HIVE-14768: Add a new UDTF Replicate_Rows (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/e19f0e35 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/e19f0e35 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/e19f0e35 Branch: refs/heads/repl2 Commit: e19f0e35e09ca283e5de46ae7e2db1e11396335e Parents: 0a4b3d8 Author: Pengcheng Xiong <pxi...@apache.org> Authored: Mon Oct 3 22:07:24 2016 -0700 Committer: Pengcheng Xiong <pxi...@apache.org> Committed: Mon Oct 3 22:07:24 2016 -0700 ---------------------------------------------------------------------- .../hadoop/hive/ql/exec/FunctionRegistry.java | 1 + .../udf/generic/GenericUDTFReplicateRows.java | 88 +++++++++++++++ .../clientpositive/udtf_replicate_rows.q | 23 ++++ .../results/clientpositive/show_functions.q.out | 1 + .../clientpositive/udtf_replicate_rows.q.out | 107 +++++++++++++++++++ 5 files changed, 220 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 6870dfa..6b29be1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -468,6 +468,7 @@ public final class FunctionRegistry { // Generic UDTF's system.registerGenericUDTF("explode", GenericUDTFExplode.class); + system.registerGenericUDTF("replicate_rows", GenericUDTFReplicateRows.class); system.registerGenericUDTF("inline", GenericUDTFInline.class); system.registerGenericUDTF("json_tuple", GenericUDTFJSONTuple.class); system.registerGenericUDTF("parse_url_tuple", GenericUDTFParseUrlTuple.class); http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java new file mode 100644 index 0000000..164445d --- /dev/null +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDTFReplicateRows.java @@ -0,0 +1,88 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.udf.generic; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentException; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver; +import org.apache.hadoop.hive.serde2.lazy.LazyLong; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.io.LongWritable; + +/** + * Takes a row of data and repeats n times. + */ +@Description(name = "replicate_rows", value = "_FUNC_(n, cols...) - turns 1 row into n rows") +public class GenericUDTFReplicateRows extends GenericUDTF { + @Override + public void close() throws HiveException { + } + + private transient List<ObjectInspector> argOIs = new ArrayList<ObjectInspector>(); + + @Override + public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { + if (args.length < 2) { + throw new UDFArgumentException("UDTFReplicateRows() expects at least two arguments."); + } + if (!(args[0] instanceof LongObjectInspector)) { + throw new UDFArgumentException( + "The first argument to UDTFReplicateRows() must be a long (got " + + args[0].getTypeName() + " instead)."); + } + + ArrayList<String> fieldNames = new ArrayList<String>(); + ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); + for (int index = 0; index < args.length; ++index) { + fieldNames.add("col" + index); + fieldOIs.add(args[index]); + } + argOIs = fieldOIs; + return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); + } + + @Override + public void process(Object[] args) throws HiveException, UDFArgumentException { + + long numRows = ((LongObjectInspector) argOIs.get(0)).get(args[0]); + + for (long n = 0; n < numRows; n++) { + forward(args); + } + } + + @Override + public String toString() { + return "UDTFReplicateRows"; + } + +} http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/queries/clientpositive/udtf_replicate_rows.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/udtf_replicate_rows.q b/ql/src/test/queries/clientpositive/udtf_replicate_rows.q new file mode 100644 index 0000000..a074a78 --- /dev/null +++ b/ql/src/test/queries/clientpositive/udtf_replicate_rows.q @@ -0,0 +1,23 @@ +set hive.mapred.mode=nonstrict; +set hive.cbo.enable=false; + +DESCRIBE FUNCTION replicate_rows; +DESCRIBE FUNCTION EXTENDED replicate_rows; + +create table t (x bigint, y string, z int); + +insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3); + +SELECT replicate_rows(x,y) FROM t; + +SELECT replicate_rows(x,y,y) FROM t; + +SELECT replicate_rows(x,y,y,y,z) FROM t; + +select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq; + +select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq; + +SELECT replicate_rows(x,concat(y,'...'),y) FROM t; + + http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/results/clientpositive/show_functions.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out index 4a40094..7b746ff 100644 --- a/ql/src/test/results/clientpositive/show_functions.q.out +++ b/ql/src/test/results/clientpositive/show_functions.q.out @@ -186,6 +186,7 @@ regexp_extract regexp_replace repeat replace +replicate_rows reverse rlike round http://git-wip-us.apache.org/repos/asf/hive/blob/e19f0e35/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out b/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out new file mode 100644 index 0000000..f76a584 --- /dev/null +++ b/ql/src/test/results/clientpositive/udtf_replicate_rows.q.out @@ -0,0 +1,107 @@ +PREHOOK: query: DESCRIBE FUNCTION replicate_rows +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION replicate_rows +POSTHOOK: type: DESCFUNCTION +replicate_rows(n, cols...) - turns 1 row into n rows +PREHOOK: query: DESCRIBE FUNCTION EXTENDED replicate_rows +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED replicate_rows +POSTHOOK: type: DESCFUNCTION +replicate_rows(n, cols...) - turns 1 row into n rows +PREHOOK: query: create table t (x bigint, y string, z int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (x bigint, y string, z int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3) +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@t +POSTHOOK: query: insert into table t values (3,'2',0),(2,'3',1),(0,'2',2),(-1,'k',3) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.x EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: t.y SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: t.z EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: SELECT replicate_rows(x,y) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT replicate_rows(x,y) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2 +3 2 +3 2 +2 3 +2 3 +PREHOOK: query: SELECT replicate_rows(x,y,y) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT replicate_rows(x,y,y) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2 2 +3 2 2 +3 2 2 +2 3 3 +2 3 3 +PREHOOK: query: SELECT replicate_rows(x,y,y,y,z) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT replicate_rows(x,y,y,y,z) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2 2 2 0 +3 2 2 2 0 +3 2 2 2 0 +2 3 3 3 1 +2 3 3 3 1 +PREHOOK: query: select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select y,x from (SELECT replicate_rows(x,y) as (x,y) FROM t)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +2 3 +2 3 +2 3 +3 2 +3 2 +PREHOOK: query: select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select z,y,x from(SELECT replicate_rows(x,y,y) as (z,y,x) FROM t)subq +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2 2 +3 2 2 +3 2 2 +2 3 3 +2 3 3 +PREHOOK: query: SELECT replicate_rows(x,concat(y,'...'),y) FROM t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: SELECT replicate_rows(x,concat(y,'...'),y) FROM t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +3 2... 2 +3 2... 2 +3 2... 2 +2 3... 3 +2 3... 3