This is an automated email from the ASF dual-hosted git repository.

sbadhya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 855e4055675 HIVE-28148: Implement array_compact UDF to remove all 
nulls from an array (#5161) (Taraka Rama Rao Lethavadla reviewed by Sourabh 
Badhya)
855e4055675 is described below

commit 855e4055675e3c993a61f59501f783e641abaaa6
Author: tarak271 <ta...@cloudera.com>
AuthorDate: Fri Apr 5 16:45:17 2024 +0530

    HIVE-28148: Implement array_compact UDF to remove all nulls from an array 
(#5161) (Taraka Rama Rao Lethavadla reviewed by Sourabh Badhya)
---
 .../hadoop/hive/ql/exec/FunctionRegistry.java      |   1 +
 .../ql/udf/generic/GenericUDFArrayCompact.java     |  56 +++++++++
 .../ql/udf/generic/TestGenericUDFArrayCompact.java | 127 +++++++++++++++++++++
 .../queries/clientnegative/udf_array_compact_1.q   |   1 +
 .../queries/clientpositive/udf_array_compact.q     |  38 ++++++
 .../clientnegative/udf_array_compact_1.q.out       |   1 +
 .../clientpositive/llap/show_functions.q.out       |   2 +
 .../clientpositive/llap/udf_array_compact.q.out    | 112 ++++++++++++++++++
 8 files changed, 338 insertions(+)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 28f35c4a15f..c54a59f9516 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -617,6 +617,7 @@ public final class FunctionRegistry {
     system.registerGenericUDF("array_remove", GenericUDFArrayRemove.class);
     system.registerGenericUDF("array_position", GenericUDFArrayPosition.class);
     system.registerGenericUDF("array_append", GenericUDFArrayAppend.class);
+    system.registerGenericUDF("array_compact", GenericUDFArrayCompact.class);
     system.registerGenericUDF("deserialize", GenericUDFDeserialize.class);
     system.registerGenericUDF("sentences", GenericUDFSentences.class);
     system.registerGenericUDF("map_keys", GenericUDFMapKeys.class);
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayCompact.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayCompact.java
new file mode 100644
index 00000000000..71f5526e126
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayCompact.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ * GenericUDFArrayCompact.
+ */
+@Description(name = "array_compact", value = "_FUNC_(array) - Removes NULL 
elements from array.",
+    extended = "Example:\n" + "  > SELECT _FUNC_(array(1,NULL,3,NULL,4)) FROM 
src;\n" + "  [1,3,4]")
+public class GenericUDFArrayCompact extends AbstractGenericUDFArrayBase {
+  private static final String FUNC_NAME = "ARRAY_COMPACT";
+
+  public GenericUDFArrayCompact() {
+    super(FUNC_NAME, 1, 1, ObjectInspector.Category.LIST);
+  }
+
+  @Override
+  public Object evaluate(DeferredObject[] arguments) throws HiveException {
+    Object array = arguments[ARRAY_IDX].get();
+    int arrayLength = arrayOI.getListLength(array);
+    if (arrayLength == 0) {
+      return Collections.emptyList();
+    } else if (arrayLength < 0) {
+      return null;
+    }
+
+    List resultArray = new ArrayList<>(((ListObjectInspector) 
argumentOIs[ARRAY_IDX]).getList(array));
+    return resultArray.stream().filter(Objects::nonNull).map(o -> 
converter.convert(o)).collect(Collectors.toList());
+  }
+}
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayCompact.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayCompact.java
new file mode 100644
index 00000000000..fb0ec6ed307
--- /dev/null
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayCompact.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static java.util.Arrays.asList;
+
+public class TestGenericUDFArrayCompact extends TestGenericUDFArray {
+
+  public TestGenericUDFArrayCompact() {
+    super.udf = new GenericUDFArrayCompact();
+  }
+
+  @Test public void testPrimitive() throws HiveException {
+    ObjectInspector[] inputOIs = { 
ObjectInspectorFactory.getStandardListObjectInspector(
+        PrimitiveObjectInspectorFactory.writableIntObjectInspector) };
+    udf.initialize(inputOIs);
+
+    Object i1 = new IntWritable(3);
+    Object i2 = new IntWritable(1);
+    Object i3 = new IntWritable(2);
+    Object i4 = new IntWritable(4);
+    runAndVerify(asList(i1, i2, null, i3, i4), asList(i1, i2, i3, i4));
+
+    i1 = new FloatWritable(13.3f);
+    i2 = new FloatWritable(1.1f);
+    i3 = new FloatWritable(3.3f);
+    i4 = new FloatWritable(2.20f);
+    runAndVerify(asList(i1, i2, i3, i4, null), asList(i1, i2, i3, i4));
+  }
+
+  @Test public void testList() throws HiveException {
+    ObjectInspector[] inputOIs = { 
ObjectInspectorFactory.getStandardListObjectInspector(
+        ObjectInspectorFactory.getStandardListObjectInspector(
+            PrimitiveObjectInspectorFactory.writableStringObjectInspector)) };
+    udf.initialize(inputOIs);
+
+    Object i1 = asList(new Text("aa1"), new Text("dd"), new Text("cc"), new 
Text("bb"));
+    Object i2 = asList(new Text("aa2"), new Text("cc"), new Text("ba"), new 
Text("dd"));
+    Object i3 = asList(new Text("aa3"), new Text("cc"), new Text("dd"), new 
Text("ee"), new Text("bb"));
+    Object i4 = asList(new Text("aa4"), new Text("cc"), new Text("ddd"), new 
Text("bb"));
+    runAndVerify(asList(i1, i2, null, i3, null, i4), asList(i1, i2, i3, i4));
+  }
+
+  @Test public void testStruct() throws HiveException {
+    ObjectInspector[] inputOIs = { 
ObjectInspectorFactory.getStandardListObjectInspector(
+        ObjectInspectorFactory.getStandardStructObjectInspector(asList("f1", 
"f2", "f3", "f4"),
+            
asList(PrimitiveObjectInspectorFactory.writableStringObjectInspector,
+                PrimitiveObjectInspectorFactory.writableDoubleObjectInspector,
+                PrimitiveObjectInspectorFactory.writableDateObjectInspector,
+                ObjectInspectorFactory.getStandardListObjectInspector(
+                    
PrimitiveObjectInspectorFactory.writableIntObjectInspector)))) };
+    udf.initialize(inputOIs);
+
+    Object i1 = asList(new Text("a"), new DoubleWritable(3.1415), new 
DateWritableV2(Date.of(2015, 5, 26)),
+        asList(new IntWritable(1), new IntWritable(3), new IntWritable(2), new 
IntWritable(4)));
+
+    Object i2 = asList(new Text("b"), new DoubleWritable(3.14), new 
DateWritableV2(Date.of(2015, 5, 26)),
+        asList(new IntWritable(1), new IntWritable(3), new IntWritable(2), new 
IntWritable(4)));
+
+    Object i3 = asList(new Text("a"), new DoubleWritable(3.1415), new 
DateWritableV2(Date.of(2015, 5, 25)),
+        asList(new IntWritable(1), new IntWritable(3), new IntWritable(2), new 
IntWritable(5)));
+
+    Object i4 = asList(new Text("a"), new DoubleWritable(3.1415), new 
DateWritableV2(Date.of(2015, 5, 25)),
+        asList(new IntWritable(1), new IntWritable(3), new IntWritable(2), new 
IntWritable(4)));
+
+    runAndVerify(asList(i1, null, null, i3, i4, i2), asList(i1, i3, i4, i2));
+  }
+
+  @Test public void testMap() throws HiveException {
+    ObjectInspector[] inputOIs = { 
ObjectInspectorFactory.getStandardListObjectInspector(
+        ObjectInspectorFactory.getStandardMapObjectInspector(
+            PrimitiveObjectInspectorFactory.writableStringObjectInspector,
+            PrimitiveObjectInspectorFactory.writableIntObjectInspector)) };
+    udf.initialize(inputOIs);
+
+    Map<Text, IntWritable> m1 = new HashMap<>();
+    m1.put(new Text("a"), new IntWritable(4));
+    m1.put(new Text("b"), new IntWritable(3));
+    m1.put(new Text("c"), new IntWritable(1));
+    m1.put(new Text("d"), new IntWritable(2));
+
+    Map<Text, IntWritable> m2 = new HashMap<>();
+    m2.put(new Text("d"), new IntWritable(4));
+    m2.put(new Text("b"), new IntWritable(3));
+    m2.put(new Text("a"), new IntWritable(1));
+    m2.put(new Text("c"), new IntWritable(2));
+
+    Map<Text, IntWritable> m3 = new HashMap<>();
+    m3.put(new Text("d"), new IntWritable(4));
+    m3.put(new Text("b"), new IntWritable(3));
+    m3.put(new Text("a"), new IntWritable(1));
+
+    runAndVerify(asList(m1, m3, m2, null, null), asList(m1, m3, m2));
+  }
+
+}
diff --git a/ql/src/test/queries/clientnegative/udf_array_compact_1.q 
b/ql/src/test/queries/clientnegative/udf_array_compact_1.q
new file mode 100644
index 00000000000..fc5fb9e2096
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/udf_array_compact_1.q
@@ -0,0 +1 @@
+SELECT array_compact(3);
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/udf_array_compact.q 
b/ql/src/test/queries/clientpositive/udf_array_compact.q
new file mode 100644
index 00000000000..f8b6a42aadb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_array_compact.q
@@ -0,0 +1,38 @@
+--! qt:dataset:src
+
+-- SORT_QUERY_RESULTS
+
+set hive.fetch.task.conversion=more;
+
+DESCRIBE FUNCTION array_compact;
+DESCRIBE FUNCTION EXTENDED array_compact;
+
+-- evalutes function for array of primitives
+SELECT array_compact(array(1, 2, 3, null,3,4)) FROM src tablesample (1 rows);
+
+SELECT array_compact(array()) FROM src tablesample (1 rows);
+
+SELECT array_compact(array(null)) FROM src tablesample (1 rows);
+
+SELECT array_compact(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9)) FROM src 
tablesample (1 rows);
+
+SELECT array_compact(array(1.1234567890, 2.234567890, 3.34567890, null, 
3.3456789, 2.234567,1.1234567890)) FROM src tablesample (1 rows);
+
+SELECT array_compact(array(11234567890, 2234567890, 334567890, null, 
11234567890, 2234567890, 334567890, null)) FROM src tablesample (1 rows);
+
+SELECT 
array_compact(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")))
 FROM src tablesample (1 rows);
+
+# handle null array cases
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/test_null_array;
+
+dfs -copyFromLocal ../../data/files/test_null_array.csv 
${system:test.tmp.dir}/test_null_array/;
+
+create external table test_null_array (id int, value Array<String>) ROW FORMAT 
DELIMITED
+ FIELDS TERMINATED BY ':' collection items terminated by ',' location 
'${system:test.tmp.dir}/test_null_array';
+
+select value from test_null_array;
+
+select array_compact(value) from test_null_array;
+
+dfs -rm -r ${system:test.tmp.dir}/test_null_array;
\ No newline at end of file
diff --git a/ql/src/test/results/clientnegative/udf_array_compact_1.q.out 
b/ql/src/test/results/clientnegative/udf_array_compact_1.q.out
new file mode 100644
index 00000000000..30933b86378
--- /dev/null
+++ b/ql/src/test/results/clientnegative/udf_array_compact_1.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10016]: Line 1:21 Argument type mismatch '3': 
"array" expected at function ARRAY_COMPACT, but "int" is found
diff --git a/ql/src/test/results/clientpositive/llap/show_functions.q.out 
b/ql/src/test/results/clientpositive/llap/show_functions.q.out
index b764c89e94d..08f85658227 100644
--- a/ql/src/test/results/clientpositive/llap/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/llap/show_functions.q.out
@@ -47,6 +47,7 @@ and
 approx_distinct
 array
 array_append
+array_compact
 array_contains
 array_distinct
 array_except
@@ -677,6 +678,7 @@ and
 approx_distinct
 array
 array_append
+array_compact
 array_contains
 array_distinct
 array_except
diff --git a/ql/src/test/results/clientpositive/llap/udf_array_compact.q.out 
b/ql/src/test/results/clientpositive/llap/udf_array_compact.q.out
new file mode 100644
index 00000000000..633e21e30bf
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/udf_array_compact.q.out
@@ -0,0 +1,112 @@
+PREHOOK: query: DESCRIBE FUNCTION array_compact
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION array_compact
+POSTHOOK: type: DESCFUNCTION
+array_compact(array) - Removes NULL elements from array.
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED array_compact
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED array_compact
+POSTHOOK: type: DESCFUNCTION
+array_compact(array) - Removes NULL elements from array.
+Example:
+  > SELECT array_compact(array(1,NULL,3,NULL,4)) FROM src;
+  [1,3,4]
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayCompact
+Function type:BUILTIN
+PREHOOK: query: SELECT array_compact(array(1, 2, 3, null,3,4)) FROM src 
tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(1, 2, 3, null,3,4)) FROM src 
tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[1,2,3,3,4]
+PREHOOK: query: SELECT array_compact(array()) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array()) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[]
+PREHOOK: query: SELECT array_compact(array(null)) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(null)) FROM src tablesample (1 
rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[]
+PREHOOK: query: SELECT array_compact(array(1.12, 2.23, 3.34, 
null,1.11,1.12,2.9)) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(1.12, 2.23, 3.34, 
null,1.11,1.12,2.9)) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[1.12,2.23,3.34,1.11,1.12,2.9]
+PREHOOK: query: SELECT array_compact(array(1.1234567890, 2.234567890, 
3.34567890, null, 3.3456789, 2.234567,1.1234567890)) FROM src tablesample (1 
rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(1.1234567890, 2.234567890, 
3.34567890, null, 3.3456789, 2.234567,1.1234567890)) FROM src tablesample (1 
rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[1.123456789,2.23456789,3.3456789,3.3456789,2.234567,1.123456789]
+PREHOOK: query: SELECT array_compact(array(11234567890, 2234567890, 334567890, 
null, 11234567890, 2234567890, 334567890, null)) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(11234567890, 2234567890, 
334567890, null, 11234567890, 2234567890, 334567890, null)) FROM src 
tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[11234567890,2234567890,334567890,11234567890,2234567890,334567890]
+PREHOOK: query: SELECT 
array_compact(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")))
 FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT 
array_compact(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")))
 FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[["a","b","c","d"],["a","b","c","d"],["a","b","c","d","e"],["e","a","b","c","d"]]
+PREHOOK: query: create external table test_null_array (id int, value 
Array<String>) ROW FORMAT DELIMITED
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_null_array
+POSTHOOK: query: create external table test_null_array (id int, value 
Array<String>) ROW FORMAT DELIMITED
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_null_array
+PREHOOK: query: select value from test_null_array
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_null_array
+#### A masked pattern was here ####
+POSTHOOK: query: select value from test_null_array
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_null_array
+#### A masked pattern was here ####
+["NULL"]
+["null","null"]
+[]
+PREHOOK: query: select array_compact(value) from test_null_array
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_null_array
+#### A masked pattern was here ####
+POSTHOOK: query: select array_compact(value) from test_null_array
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_null_array
+#### A masked pattern was here ####
+["NULL"]
+["null","null"]
+[]

Reply via email to