This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 3d7d82a1c0a HIVE-25043: Addendum: Support custom UDF in Vectorized
mode (#5703)
3d7d82a1c0a is described below
commit 3d7d82a1c0a53701c233e83572e8094f73625a8b
Author: Ryu Kobayashi <[email protected]>
AuthorDate: Fri May 30 00:05:32 2025 +0900
HIVE-25043: Addendum: Support custom UDF in Vectorized mode (#5703)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 6 +-
data/conf/llap-udfs.lst | 1 +
.../test/resources/testconfiguration.properties | 1 +
.../ql/udf/generic/GenericUDFCustomDateSub.java | 55 +++++
.../hive/ql/exec/vector/VectorizationContext.java | 19 +-
.../hive/ql/optimizer/physical/Vectorizer.java | 38 ++++
.../clientpositive/custom_udf_vectorization.q | 24 +++
.../llap/custom_udf_vectorization.q.out | 238 +++++++++++++++++++++
8 files changed, 362 insertions(+), 20 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 8ab998f2cb9..edb42576c84 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4702,9 +4702,9 @@ public static enum ConfVars {
"The default value is true."),
HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST("hive.vectorized.adaptor.custom.udf.whitelist",
"",
- "Custom UDF allowed when hive.vectorized.adaptor.usage.mode is
chosen.\n" +
- "Specify classes separated by commas:\n" +
- "package.FooClass,package.BarClass"),
+ "A comma-separated list of custom UDFs allowed to operate in
vectorized mode " +
+ "when hive.vectorized.adaptor.usage.mode is set to chosen.\n" +
+ "Only Generic UDFs are supported for whitelisting; ensure that each
custom UDF class extends GenericUDF"),
HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT("hive.vectorized.ptf.max.memory.buffering.batch.count",
25,
"Maximum number of vectorized row batches to buffer in memory for
PTF\n" +
diff --git a/data/conf/llap-udfs.lst b/data/conf/llap-udfs.lst
new file mode 100644
index 00000000000..e603400122a
--- /dev/null
+++ b/data/conf/llap-udfs.lst
@@ -0,0 +1 @@
+org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub
diff --git a/itests/src/test/resources/testconfiguration.properties
b/itests/src/test/resources/testconfiguration.properties
index f08d0721895..ceea0e477a1 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -71,6 +71,7 @@ minillap.query.files=\
cte_2.q,\
cte_4.q,\
cttl.q,\
+ custom_udf_vectorization.q,\
dynamic_partition_pruning_2.q,\
dynamic_semijoin_user_level.q,\
dynpart_cast.q,\
diff --git
a/itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCustomDateSub.java
b/itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCustomDateSub.java
new file mode 100644
index 00000000000..59624657c43
--- /dev/null
+++
b/itests/util/src/main/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCustomDateSub.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions;
+import
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColCol;
+import
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubColScalar;
+import
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorUDFDateSubScalarCol;
+
+/**
+ * UDFDateSub.
+ *
+ * Subtract a number of days to the date. The time part of the string will be
+ * ignored.
+ *
+ * NOTE: This is a subset of what MySQL offers as:
+ * http://dev.mysql.com/doc/refman
+ * /5.1/en/date-and-time-functions.html#function_date-sub
+ *
+ */
+@Description(name = "date_sub",
+ value = "_FUNC_(start_date, num_days) - Returns the date that is num_days
before start_date.",
+ extended = "start_date is a string in the format 'yyyy-MM-dd HH:mm:ss' or"
+ + " 'yyyy-MM-dd'. num_days is a number. The time part of start_date is
"
+ + "ignored.\n"
+ + "Example:\n "
+ + " > SELECT _FUNC_('2009-07-30', 1) FROM src LIMIT 1;\n"
+ + " '2009-07-29'")
+@VectorizedExpressions({VectorUDFDateSubColScalar.class,
VectorUDFDateSubScalarCol.class, VectorUDFDateSubColCol.class})
+public class GenericUDFCustomDateSub extends GenericUDFDateAdd {
+ public GenericUDFCustomDateSub() {
+ this.signModifier = -1;
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ return getStandardDisplayString("date_sub", children);
+ }
+}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index a20d400894a..e56cbb3cec7 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -277,18 +277,6 @@ public static HiveVectorIfStmtMode
getHiveConfValue(HiveConf hiveConf) {
private HiveVectorIfStmtMode hiveVectorIfStmtMode;
- private Set<String> allowedCustomUDFs;
-
- private Set<String> getAllowedCustomUDFs(HiveConf hiveConf) {
- String udfs = HiveConf.getVar(hiveConf,
- HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST);
- if (udfs != null && !udfs.isEmpty()) {
- return new HashSet<>(Arrays.asList(udfs.split(",")));
- }
-
- return new HashSet<>();
- }
-
//when set to true use the overflow checked vector expressions
private boolean useCheckedVectorExpressions;
@@ -310,7 +298,6 @@ private void setHiveConfVars(HiveConf hiveConf) {
adaptorSuppressEvaluateExceptions =
HiveConf.getBoolVar(
hiveConf,
HiveConf.ConfVars.HIVE_VECTORIZED_ADAPTOR_SUPPRESS_EVALUATE_EXCEPTIONS);
- this.allowedCustomUDFs = getAllowedCustomUDFs(hiveConf);
}
private void copyHiveConfVars(VectorizationContext vContextEnvironment) {
@@ -1050,7 +1037,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc
exprDesc, VectorExpress
"Could not vectorize expression (mode = " + mode.name() + "):
" + exprDesc.toString()
+ " because hive.vectorized.adaptor.usage.mode=none");
case CHOSEN:
- if (isNonVectorizedPathUDF(expr, mode, allowedCustomUDFs)) {
+ if (isNonVectorizedPathUDF(expr, mode)) {
ve = getCustomUDFExpression(expr, mode);
} else {
throw new HiveException(
@@ -1460,7 +1447,7 @@ public static GenericUDF getGenericUDFForCast(TypeInfo
castType) throws HiveExce
* may be implemented in the future with an optimized VectorExpression.
*/
private static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
- VectorExpressionDescriptor.Mode mode, Set<String> allowCustomUDFs) {
+ VectorExpressionDescriptor.Mode mode) {
GenericUDF gudf = expr.getGenericUDF();
if (gudf instanceof GenericUDFBridge) {
GenericUDFBridge bridge = (GenericUDFBridge) gudf;
@@ -1498,8 +1485,6 @@ private static boolean
isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
return true;
} else if (gudf instanceof GenericUDFConcat && (mode ==
VectorExpressionDescriptor.Mode.PROJECTION)) {
return true;
- } else if (allowCustomUDFs.contains(gudf.getClass().getName())) {
- return true;
}
return false;
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
index 8a7fc128737..571d3bcafbd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
@@ -178,6 +178,7 @@
import org.apache.hadoop.hive.ql.plan.ptf.WindowFrameDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef;
import org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef;
+import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.udf.UDFAcos;
import org.apache.hadoop.hive.ql.udf.UDFAsin;
import org.apache.hadoop.hive.ql.udf.UDFAtan;
@@ -550,6 +551,9 @@ public Vectorizer() {
// For conditional expressions
supportedGenericUDFs.add(GenericUDFIf.class);
+
+ // Add user custom UDFs
+ addCustomUDFs(SessionState.getSessionConf());
}
private class VectorTaskColumnInfo {
@@ -2411,6 +2415,40 @@ private boolean getOnlyStructObjectInspectors(ReduceWork
reduceWork,
}
}
+ private void addCustomUDFs(HiveConf hiveConf) {
+ if (hiveConf == null) {
+ return;
+ }
+
+ if (HiveVectorAdaptorUsageMode.CHOSEN !=
+ HiveVectorAdaptorUsageMode.getHiveConfValue(hiveConf)) {
+ return;
+ }
+
+ String[] udfs =
+ HiveConf.getTrimmedStringsVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST);
+ if (udfs == null) {
+ return;
+ }
+
+ ClassLoader loader = Utilities.getSessionSpecifiedClassLoader();
+
+ for (String udf : udfs) {
+ try {
+ Class<?> cls = Class.forName(udf, true, loader);
+ if (GenericUDF.class.isAssignableFrom(cls)) {
+ supportedGenericUDFs.add(cls);
+ LOG.info("Registered custom UDF: {}", udf);
+ } else {
+ LOG.warn("{} must inherit from the GenericUDF", udf);
+ }
+ } catch (ClassNotFoundException e) {
+ LOG.warn("Failed to register custom UDF: {}", udf, e);
+ }
+ }
+ }
+
@Override
public PhysicalContext resolve(PhysicalContext physicalContext) throws
SemanticException {
diff --git a/ql/src/test/queries/clientpositive/custom_udf_vectorization.q
b/ql/src/test/queries/clientpositive/custom_udf_vectorization.q
new file mode 100644
index 00000000000..50006b449f4
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/custom_udf_vectorization.q
@@ -0,0 +1,24 @@
+--! qt:dataset:src
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=false;
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.adaptor.usage.mode=chosen;
+set hive.llap.execution.mode=auto;
+
+CREATE TEMPORARY FUNCTION CDATE_SUB AS
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub';
+
+set
hive.vectorized.adaptor.custom.udf.whitelist=org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub;
+
+EXPLAIN VECTORIZATION
+SELECT CDATE_SUB('2000-01-01', 1) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key IS NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key Is NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key <> '' AND value <> ''
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key <> '' AND
value <> '';
+
diff --git
a/ql/src/test/results/clientpositive/llap/custom_udf_vectorization.q.out
b/ql/src/test/results/clientpositive/llap/custom_udf_vectorization.q.out
new file mode 100644
index 00000000000..e9d4b615460
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/custom_udf_vectorization.q.out
@@ -0,0 +1,238 @@
+PREHOOK: query: CREATE TEMPORARY FUNCTION CDATE_SUB AS
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub'
+PREHOOK: type: CREATEFUNCTION
+PREHOOK: Output: cdate_sub
+POSTHOOK: query: CREATE TEMPORARY FUNCTION CDATE_SUB AS
'org.apache.hadoop.hive.ql.udf.generic.GenericUDFCustomDateSub'
+POSTHOOK: type: CREATEFUNCTION
+POSTHOOK: Output: cdate_sub
+PREHOOK: query: EXPLAIN VECTORIZATION
+SELECT CDATE_SUB('2000-01-01', 1) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key IS NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key Is NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key <> '' AND value <> ''
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key <> '' AND
value <> ''
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: EXPLAIN VECTORIZATION
+SELECT CDATE_SUB('2000-01-01', 1) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key IS NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key Is NOT NULL
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', 1) FROM src WHERE key <> '' AND value <> ''
+UNION ALL
+SELECT CDATE_SUB('2000-01-01', CAST(key as int)) FROM src WHERE key <> '' AND
value <> ''
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Map 1 <- Union 2 (CONTAINS)
+ Map 3 <- Union 2 (CONTAINS)
+ Map 4 <- Union 2 (CONTAINS)
+ Map 5 <- Union 2 (CONTAINS)
+ Map 6 <- Union 2 (CONTAINS)
+ Map 7 <- Union 2 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: DATE'1999-12-31' (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 28000 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3000 Data size: 168000 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 3
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_sub('2000-01-01', UDFToInteger(key))
(type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 28000 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3000 Data size: 168000 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: DATE'1999-12-31' (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 28000 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3000 Data size: 168000 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 5
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 43500 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_sub('2000-01-01', UDFToInteger(key))
(type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 28000 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3000 Data size: 168000 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 6
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: ((value <> '') and (key <> '')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((value <> '') and (key <> '')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: DATE'1999-12-31' (type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 28000 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3000 Data size: 168000 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Map 7
+ Map Operator Tree:
+ TableScan
+ alias: src
+ filterExpr: ((value <> '') and (key <> '')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Filter Operator
+ predicate: ((value <> '') and (key <> '')) (type: boolean)
+ Statistics: Num rows: 500 Data size: 89000 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ expressions: date_sub('2000-01-01', UDFToInteger(key))
(type: date)
+ outputColumnNames: _col0
+ Statistics: Num rows: 500 Data size: 28000 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 3000 Data size: 168000 Basic
stats: COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Execution mode: vectorized
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vector.serde.deserialize IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats: org.apache.hadoop.mapred.TextInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ Union 2
+ Vertex: Union 2
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+