HIVE-12230 custom UDF configure() not called in Vectorization mode (Matt 
McCline, reviewd by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/95fcdb55
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/95fcdb55
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/95fcdb55

Branch: refs/heads/spark
Commit: 95fcdb55513e4771f7b387f714043870ef41ce66
Parents: d33ddef
Author: Matt McCline <mmccl...@hortonworks.com>
Authored: Thu Nov 5 13:16:14 2015 -0800
Committer: Matt McCline <mmccl...@hortonworks.com>
Committed: Thu Nov 5 13:16:14 2015 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/MapredContext.java      |  2 +-
 .../ql/exec/vector/udf/VectorUDFAdaptor.java    |  5 ++
 .../hive/ql/exec/vector/UDFHelloTest.java       | 69 +++++++++++++++++++
 .../vector_custom_udf_configure.q               | 11 +++
 .../vector_custom_udf_configure.q.out           | 70 ++++++++++++++++++++
 5 files changed, 156 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/95fcdb55/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java
index 6ce84ac..b7ed0c1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MapredContext.java
@@ -116,7 +116,7 @@ public class MapredContext {
     udfs.clear();
   }
 
-  void setup(GenericUDF genericUDF) {
+  public void setup(GenericUDF genericUDF) {
     if (needConfigure(genericUDF)) {
       genericUDF.configure(this);
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/95fcdb55/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
index b397398..d3a0f9f 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/udf/VectorUDFAdaptor.java
@@ -21,6 +21,7 @@ import java.sql.Date;
 import java.sql.Timestamp;
 
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.MapredContext;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.vector.*;
 import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
@@ -84,6 +85,10 @@ public class VectorUDFAdaptor extends VectorExpression {
     for (int i = 0; i < childrenOIs.length; i++) {
       childrenOIs[i] = writers[i].getObjectInspector();
     }
+    MapredContext context = MapredContext.get();
+    if (context != null) {
+      context.setup(genericUDF);
+    }
     outputOI = VectorExpressionWriterFactory.genVectorExpressionWritable(expr)
         .getObjectInspector();
 

http://git-wip-us.apache.org/repos/asf/hive/blob/95fcdb55/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/UDFHelloTest.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/UDFHelloTest.java 
b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/UDFHelloTest.java
new file mode 100644
index 0000000..48fb59a
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/UDFHelloTest.java
@@ -0,0 +1,69 @@
+package org.apache.hadoop.hive.ql.exec.vector;
+
+import org.apache.hadoop.hive.ql.exec.MapredContext;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import 
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.Text;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * UDF to obfuscate input data appending "Hello "
+ */
+public class UDFHelloTest extends GenericUDF {
+  private static final Logger LOG = 
LoggerFactory.getLogger(UDFHelloTest.class);
+
+  private Text result = new Text();
+
+  private static String greeting = "";
+
+  private ObjectInspectorConverters.Converter[] converters;
+
+  @Override
+  public Object evaluate(DeferredObject[] arg0) throws HiveException {
+
+    if (arg0.length != 1) {
+      LOG.error("UDFHelloTest expects exactly 1 argument");
+      throw new HiveException("UDFHelloTest expects exactly 1 argument");
+    }
+
+    if (arg0[0].get() == null) {
+      LOG.warn("Empty input");
+      return null;
+    }
+
+    Text data = (Text) converters[0].convert(arg0[0].get());
+
+    String dataString = data.toString();
+
+    result.set(greeting + dataString);
+
+    return result;
+  }
+
+  @Override
+  public String getDisplayString(String[] arg0) {
+    return "Hello...";
+  }
+
+  @Override
+  public void configure(MapredContext context) {
+    greeting = "Hello ";
+  }
+
+  @Override
+  public ObjectInspector initialize(ObjectInspector[] arg0) throws 
UDFArgumentException {
+    converters = new ObjectInspectorConverters.Converter[arg0.length];
+    for (int i = 0; i < arg0.length; i++) {
+      converters[i] = ObjectInspectorConverters.getConverter(arg0[i],
+              PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+    }
+
+    // evaluate will return a Text object
+    return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/95fcdb55/ql/src/test/queries/clientpositive/vector_custom_udf_configure.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_custom_udf_configure.q 
b/ql/src/test/queries/clientpositive/vector_custom_udf_configure.q
new file mode 100644
index 0000000..eb19f3a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_custom_udf_configure.q
@@ -0,0 +1,11 @@
+set hive.fetch.task.conversion=none;
+
+create temporary function UDFHelloTest as 
'org.apache.hadoop.hive.ql.exec.vector.UDFHelloTest';
+
+create table testorc1(id int, name string) stored as orc;
+insert into table testorc1 values(1, 'a1'), (2,'a2');
+ 
+set hive.vectorized.execution.enabled=true;
+explain
+select id, UDFHelloTest(name) from testorc1;
+select id, UDFHelloTest(name) from testorc1;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/95fcdb55/ql/src/test/results/clientpositive/vector_custom_udf_configure.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/vector_custom_udf_configure.q.out 
b/ql/src/test/results/clientpositive/vector_custom_udf_configure.q.out
new file mode 100644
index 0000000..d529873
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_custom_udf_configure.q.out
@@ -0,0 +1,70 @@
+PREHOOK: query: create temporary function UDFHelloTest as 
'org.apache.hadoop.hive.ql.exec.vector.UDFHelloTest'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: udfhellotest
+POSTHOOK: query: create temporary function UDFHelloTest as 
'org.apache.hadoop.hive.ql.exec.vector.UDFHelloTest'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: udfhellotest
+PREHOOK: query: create table testorc1(id int, name string) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testorc1
+POSTHOOK: query: create table testorc1(id int, name string) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testorc1
+PREHOOK: query: insert into table testorc1 values(1, 'a1'), (2,'a2')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@testorc1
+POSTHOOK: query: insert into table testorc1 values(1, 'a1'), (2,'a2')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@testorc1
+POSTHOOK: Lineage: testorc1.id EXPRESSION 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
+POSTHOOK: Lineage: testorc1.name SIMPLE 
[(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
+PREHOOK: query: explain
+select id, UDFHelloTest(name) from testorc1
+PREHOOK: type: QUERY
+POSTHOOK: query: explain
+select id, UDFHelloTest(name) from testorc1
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: testorc1
+            Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE 
Column stats: NONE
+            Select Operator
+              expressions: id (type: int), Hello... (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE 
Column stats: NONE
+              File Output Operator
+                compressed: false
+                Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE 
Column stats: NONE
+                table:
+                    input format: org.apache.hadoop.mapred.TextInputFormat
+                    output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+      Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select id, UDFHelloTest(name) from testorc1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testorc1
+#### A masked pattern was here ####
+POSTHOOK: query: select id, UDFHelloTest(name) from testorc1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testorc1
+#### A masked pattern was here ####
+1      Hello a1
+2      Hello a2

Reply via email to