This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new da272b45f77 HIVE-25043: Support custom UDFs in Vectorized mode (Ryu
Kobayashi, reviewed by Denys Kuzmenko)
da272b45f77 is described below
commit da272b45f7707c41713dbfd50d61f0ff285d099f
Author: Ryu Kobayashi <[email protected]>
AuthorDate: Wed Feb 5 05:47:15 2025 +0900
HIVE-25043: Support custom UDFs in Vectorized mode (Ryu Kobayashi, reviewed
by Denys Kuzmenko)
Closes #5631
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 5 +++++
.../hive/ql/exec/vector/VectorizationContext.java | 21 ++++++++++++++++++---
2 files changed, 23 insertions(+), 3 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 4581dce12cf..d89f1922d2a 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -4704,6 +4704,11 @@ public static enum ConfVars {
"This flag should be set to true to enable vectorized mode of the PTF
of query execution.\n" +
"The default value is true."),
+
HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST("hive.vectorized.adaptor.custom.udf.whitelist",
"",
+ "Custom UDF allowed when hive.vectorized.adaptor.usage.mode is
chosen.\n" +
+ "Specify classes separated by commas:\n" +
+ "package.FooClass,package.BarClass"),
+
HIVE_VECTORIZATION_PTF_MAX_MEMORY_BUFFERING_BATCH_COUNT("hive.vectorized.ptf.max.memory.buffering.batch.count",
25,
"Maximum number of vectorized row batches to buffer in memory for
PTF\n" +
"The default value is 25"),
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 289e3f5c480..e49cd5bf3f9 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -277,6 +277,18 @@ public static HiveVectorIfStmtMode
getHiveConfValue(HiveConf hiveConf) {
private HiveVectorIfStmtMode hiveVectorIfStmtMode;
+ private Set<String> allowedCustomUDFs;
+
+ private Set<String> getAllowedCustomUDFs(HiveConf hiveConf) {
+ String udfs = HiveConf.getVar(hiveConf,
+ HiveConf.ConfVars.HIVE_VECTOR_ADAPTOR_CUSTOM_UDF_WHITELIST);
+ if (udfs != null && !udfs.isEmpty()) {
+ return new HashSet<>(Arrays.asList(udfs.split(",")));
+ }
+
+ return new HashSet<>();
+ }
+
//when set to true use the overflow checked vector expressions
private boolean useCheckedVectorExpressions;
@@ -298,6 +310,7 @@ private void setHiveConfVars(HiveConf hiveConf) {
adaptorSuppressEvaluateExceptions =
HiveConf.getBoolVar(
hiveConf,
HiveConf.ConfVars.HIVE_VECTORIZED_ADAPTOR_SUPPRESS_EVALUATE_EXCEPTIONS);
+ this.allowedCustomUDFs = getAllowedCustomUDFs(hiveConf);
}
private void copyHiveConfVars(VectorizationContext vContextEnvironment) {
@@ -1037,7 +1050,7 @@ public VectorExpression getVectorExpression(ExprNodeDesc
exprDesc, VectorExpress
"Could not vectorize expression (mode = " + mode.name() + "):
" + exprDesc.toString()
+ " because hive.vectorized.adaptor.usage.mode=none");
case CHOSEN:
- if (isNonVectorizedPathUDF(expr, mode)) {
+ if (isNonVectorizedPathUDF(expr, mode, allowedCustomUDFs)) {
ve = getCustomUDFExpression(expr, mode);
} else {
throw new HiveException(
@@ -1446,8 +1459,8 @@ public static GenericUDF getGenericUDFForCast(TypeInfo
castType) throws HiveExce
* Depending on performance requirements and frequency of use, these
* may be implemented in the future with an optimized VectorExpression.
*/
- public static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
- VectorExpressionDescriptor.Mode mode) {
+ private static boolean isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
+ VectorExpressionDescriptor.Mode mode, Set<String> allowCustomUDFs) {
GenericUDF gudf = expr.getGenericUDF();
if (gudf instanceof GenericUDFBridge) {
GenericUDFBridge bridge = (GenericUDFBridge) gudf;
@@ -1486,6 +1499,8 @@ public static boolean
isNonVectorizedPathUDF(ExprNodeGenericFuncDesc expr,
return true;
} else if (gudf instanceof GenericUDFConcat && (mode ==
VectorExpressionDescriptor.Mode.PROJECTION)) {
return true;
+ } else if (allowCustomUDFs.contains(gudf.getClass().getName())) {
+ return true;
}
return false;
}