(hive) branch master updated: HIVE-25043: Addendum: Support custom UDF in Vectorized mode (#5703)

dkuzmenko Thu, 29 May 2025 08:07:23 -0700

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 3d7d82a1c0a HIVE-25043: Addendum: Support custom UDF in Vectorized 
mode (#5703)
3d7d82a1c0a is described below

commit 3d7d82a1c0a53701c233e83572e8094f73625a8b
Author: Ryu Kobayashi <[email protected]>
AuthorDate: Fri May 30 00:05:32 2025 +0900

    HIVE-25043: Addendum: Support custom UDF in Vectorized mode (#5703)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   6 +-
 data/conf/llap-udfs.lst                            |   1 +
 .../test/resources/testconfiguration.properties    |   1 +
 .../ql/udf/generic/GenericUDFCustomDateSub.java    |  55 +++++
 .../hive/ql/exec/vector/VectorizationContext.java  |  19 +-
 .../hive/ql/optimizer/physical/Vectorizer.java     |  38 ++++
 .../clientpositive/custom_udf_vectorization.q      |  24 +++
 .../llap/custom_udf_vectorization.q.out            | 238 +++++++++++++++++++++
 8 files changed, 362 insertions(+), 20 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 8ab998f2cb9..edb42576c84 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4702,9 +4702,9 @@ public static enum ConfVars {
         "The default value is true."),
 
     
HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST("hive.vectorized.adaptor.custom.udf.whitelist",
 "",
-        "Custom UDF allowed when hive.vectorized.adaptor.usage.mode is 
chosen.\n" +
-        "Specify classes separated by commas:\n" +
-        "package.FooClass,package.BarClass"),
+        "A comma-separated list of custom UDFs allowed to operate in 
vectorized mode " +
+        "when hive.vectorized.adaptor.usage.mode is set to chosen.\n" +
+        "Only Generic UDFs are supported for whitelisting; ensure that each 
custom UDF class extends GenericUDF"),
 
     
HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT("hive.vectorized.ptf.max.memory.buffering.batch.count",
 25,
         "Maximum number of vectorized row batches to buffer in memory for 
PTF\n" +
diff --git a/data/conf/llap-udfs.lst b/data/conf/llap-udfs.lst
new file mode 100644
index 00000000000..e603400122a
--- /dev/null
+++ b/data/conf/llap-udfs.lst
@@ -0,0 +1 @@
+org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index f08d0721895..ceea0e477a1 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -71,6 +71,7 @@ minillap.query.files=\
   cte_2.q,\
   cte_4.q,\
   cttl.q,\
+  custom_udf_vectorization.q,\
   dynamic_partition_pruning_2.q,\
   dynamic_semijoin_user_level.q,\
   dynpart_cast.q,\
diff --git 
a/itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCustomDateSub.java
 
b/itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCustomDateSub.java
new file mode 100644
index 00000000000..59624657c43
--- /dev/null
+++ 
b/itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCustomDateSub.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
+import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColCol;
+import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColScalar;
+import 
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubScalarCol;
+
+/**
+ * UDFDateSub.
+ *
+ * Subtract a number of days to the date. The time part of the string will be
+ * ignored.
+ *
+ * NOTE: This is a subset of what MySQL offers as:
+ * http://dev.mysql.com/doc/refman
+ * /5.1/en/date-and-time-functions.html#function_date-sub
+ *
+ */
+@Description(name = "date_sub",
+    value = "_FUNC_(start_date, num_days) - Returns the date that is num_days 
before start_date.",
+    extended = "start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or"
+        + " 'yyyy-MM-dd'. num_days is a number. The time part of start_date is 
"
+        + "ignored.\n"
+        + "Example:\n "
+        + "  > SELECT _FUNC_('2009-07-30', 1) FROM src LIMIT 1;\n"
+        + "  '2009-07-29'")
+@VectorizedExpressions({VectorUDFDateSubColScalar.class, 
VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class})
+public class GenericUDFCustomDateSub extends GenericUDFDateAdd {
+  public GenericUDFCustomDateSub() {
+    this.signModifier = -1;
+  }
+
+  @Override
+  public String getDisplayString(String[] children) {
+    return getStandardDisplayString("date_sub", children);
+  }
+}
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index a20d400894a..e56cbb3cec7 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -277,18 +277,6 @@ public static HiveVectorIfStmtMode 
getHiveConfValue(HiveConf hiveConf) {
 
   private HiveVectorIfStmtMode hiveVectorIfStmtMode;
 
-  private Set<String> allowedCustomUDFs;
-
-  private Set<String> getAllowedCustomUDFs(HiveConf hiveConf) {
-    String udfs = HiveConf.getVar(hiveConf,
-        HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST);
-    if (udfs != null && !udfs.isEmpty()) {
-      return new HashSet<>(Arrays.asList(udfs.split(",")));
-    }
-
-    return new HashSet<>();
-  }
-
   //when set to true use the overflow checked vector expressions
   private boolean useCheckedVectorExpressions;
 
@@ -310,7 +298,6 @@ private void setHiveConfVars(HiveConf hiveConf) {
     adaptorSuppressEvaluateExceptions =
         HiveConf.getBoolVar(
             hiveConf, 
HiveConf.ConfVars.HIVE_VECTORIZED_ADAPTOR_SUPPRESS_EVALUATE_EXCEPTIONS);
-    this.allowedCustomUDFs = getAllowedCustomUDFs(hiveConf);
   }
 
   private void copyHiveConfVars(VectorizationContext vContextEnvironment) {
@@ -1050,7 +1037,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc 
exprDesc, VectorExpress
                 "Could not vectorize expression (mode = " + mode.name() + "): 
" + exprDesc.toString()
                   + " because hive.vectorized.adaptor.usage.mode=none");
           case CHOSEN:
-            if (isNonVectorizedPathUDF(expr, mode, allowedCustomUDFs)) {
+            if (isNonVectorizedPathUDF(expr, mode)) {
               ve = getCustomUDFExpression(expr, mode);
             } else {
               throw new HiveException(
@@ -1460,7 +1447,7 @@ public static GenericUDF getGenericUDFForCast(TypeInfo 
castType) throws HiveExce
    * may be implemented in the future with an optimized VectorExpression.
    */
   private static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
-      VectorExpressionDescriptor.Mode mode, Set<String> allowCustomUDFs) {
+      VectorExpressionDescriptor.Mode mode) {
     GenericUDF gudf = expr.getGenericUDF();
     if (gudf instanceof GenericUDFBridge) {
       GenericUDFBridge bridge = (GenericUDFBridge) gudf;
@@ -1498,8 +1485,6 @@ private static boolean 
isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
       return true;
     } else if (gudf instanceof GenericUDFConcat && (mode == 
VectorExpressionDescriptor.Mode.PROJECTION)) {
       return true;
-    } else if (allowCustomUDFs.contains(gudf.getClass().getName())) {
-      return true;
     }
     return false;
   }
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 8a7fc128737..571d3bcafbd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -178,6 +178,7 @@
 import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
 import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.ql.udf.UDFAcos;
 import org.apache.hadoop.hive.ql.udf.UDFAsin;
 import org.apache.hadoop.hive.ql.udf.UDFAtan;
@@ -550,6 +551,9 @@ public Vectorizer() {
 
     // For conditional expressions
     supportedGenericUDFs.add(GenericUDFIf.class);
+
+    // Add user custom UDFs
+    addCustomUDFs(SessionState.getSessionConf());
   }
 
   private class VectorTaskColumnInfo {
@@ -2411,6 +2415,40 @@ private boolean getOnlyStructObjectInspectors(ReduceWork 
reduceWork,
     }
   }
 
+  private void addCustomUDFs(HiveConf hiveConf) {
+    if (hiveConf == null) {
+      return;
+    }
+
+    if (HiveVectorAdaptorUsageMode.CHOSEN !=
+        HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf)) {
+      return;
+    }
+
+    String[] udfs =
+        HiveConf.getTrimmedStringsVar(hiveConf,
+            HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST);
+    if (udfs == null) {
+      return;
+    }
+
+    ClassLoader loader = Utilities.getSessionSpecifiedClassLoader();
+
+    for (String udf : udfs) {
+      try {
+        Class<?> cls = Class.forName(udf, true, loader);
+        if (GenericUDF.class.isAssignableFrom(cls)) {
+          supportedGenericUDFs.add(cls);
+          LOG.info("Registered custom UDF: {}", udf);
+        } else {
+          LOG.warn("{} must inherit from the GenericUDF", udf);
+        }
+      } catch (ClassNotFoundException e) {
+        LOG.warn("Failed to register custom UDF: {}", udf, e);
+      }
+    }
+  }
+
   @Override
   public PhysicalContext resolve(PhysicalContext physicalContext) throws 
SemanticException {
 
diff --git a/ql/src/test/queries/clientpositive/custom_udf_vectorization.q 
b/ql/src/test/queries/clientpositive/custom_udf_vectorization.q
new file mode 100644
index 00000000000..50006b449f4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/custom_udf_vectorization.q
@@ -0,0 +1,24 @@
+--! qt:dataset:src
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.adaptor.usage.mode=chosen;
+set hive.llap.execution.mode=auto;
+
+CREATE TEMPORARY FUNCTION CDATE_SUB AS 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub';
+
+set 
hive.vectorized.adaptor.custom.udf.whitelist=org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub;
+
+EXPLAIN VECTORIZATION
+SELECT CDATE_SUB('2000-01-01', 1) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key IS NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key Is NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key <> '' AND value <> ''
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key <> '' AND 
value <> '';
+
diff --git 
a/ql/src/test/results/clientpositive/llap/custom_udf_vectorization.q.out 
b/ql/src/test/results/clientpositive/llap/custom_udf_vectorization.q.out
new file mode 100644
index 00000000000..e9d4b615460
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/custom_udf_vectorization.q.out
@@ -0,0 +1,238 @@
+PREHOOK: query: CREATE TEMPORARY FUNCTION CDATE_SUB AS 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: cdate_sub
+POSTHOOK: query: CREATE TEMPORARY FUNCTION CDATE_SUB AS 
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: cdate_sub
+PREHOOK: query: EXPLAIN VECTORIZATION
+SELECT CDATE_SUB('2000-01-01', 1) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key IS NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key Is NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key <> '' AND value <> ''
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key <> '' AND 
value <> ''
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: EXPLAIN VECTORIZATION
+SELECT CDATE_SUB('2000-01-01', 1) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key IS NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key Is NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key <> '' AND value <> ''
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key <> '' AND 
value <> ''
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+PLAN VECTORIZATION:
+  enabled: true
+  enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Map 1 <- Union 2 (CONTAINS)
+        Map 3 <- Union 2 (CONTAINS)
+        Map 4 <- Union 2 (CONTAINS)
+        Map 5 <- Union 2 (CONTAINS)
+        Map 6 <- Union 2 (CONTAINS)
+        Map 7 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 5312 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: DATE'1999-12-31' (type: date)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 28000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 3000 Data size: 168000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 3 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: date_sub('2000-01-01', UDFToInteger(key)) 
(type: date)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 500 Data size: 28000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 3000 Data size: 168000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 4 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: DATE'1999-12-31' (type: date)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 28000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 3000 Data size: 168000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 5 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  filterExpr: key is not null (type: boolean)
+                  Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: key is not null (type: boolean)
+                    Statistics: Num rows: 500 Data size: 43500 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: date_sub('2000-01-01', UDFToInteger(key)) 
(type: date)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 28000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 3000 Data size: 168000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 6 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  filterExpr: ((value <> '') and (key <> '')) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((value <> '') and (key <> '')) (type: boolean)
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: DATE'1999-12-31' (type: date)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 28000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 3000 Data size: 168000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Map 7 
+            Map Operator Tree:
+                TableScan
+                  alias: src
+                  filterExpr: ((value <> '') and (key <> '')) (type: boolean)
+                  Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((value <> '') and (key <> '')) (type: boolean)
+                    Statistics: Num rows: 500 Data size: 89000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: date_sub('2000-01-01', UDFToInteger(key)) 
(type: date)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 500 Data size: 28000 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      File Output Operator
+                        compressed: false
+                        Statistics: Num rows: 3000 Data size: 168000 Basic 
stats: COMPLETE Column stats: COMPLETE
+                        table:
+                            input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                            output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                            serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+            Map Vectorization:
+                enabled: true
+                enabledConditionsMet: 
hive.vectorized.use.vector.serde.deserialize IS true
+                inputFormatFeatureSupport: [DECIMAL_64]
+                featureSupportInUse: [DECIMAL_64]
+                inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+                allNative: false
+                usesVectorUDFAdaptor: false
+                vectorized: true
+        Union 2 
+            Vertex: Union 2
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

(hive) branch master updated: HIVE-25043: Addendum: Support custom UDF in Vectorized mode (#5703)

Reply via email to