This is an automated email from the ASF dual-hosted git repository.
zclllyybb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new aa68e4bd9e7 [Enhancement](udf) Reject bitmap, hll, and quantile_state
in udf create (#63849)
aa68e4bd9e7 is described below
commit aa68e4bd9e74dc84eaeb3860058ba818e2081abb
Author: linrrarity <[email protected]>
AuthorDate: Fri May 29 11:34:39 2026 +0800
[Enhancement](udf) Reject bitmap, hll, and quantile_state in udf create
(#63849)
Problem Summary:
UDF creation currently allows `BITMAP`, `HLL`, and `QUANTILE_STATE` in
function signatures, but these object types are not exposed to
Java/Python UDF runtimes as first-class values. They are effectively
bridged as opaque bytes, and marked unsupported in
[doc](https://doris.apache.org/docs/dev/query-data/udf/python-user-defined-function#data-type-mapping)
---
.../plans/commands/CreateFunctionCommand.java | 33 ++++
.../apache/doris/catalog/CreateFunctionTest.java | 35 ++++
.../test_pythonudaf_object_types_inline.groovy | 184 +++++++++++++++++++++
.../test_pythonudf_object_types_inline.groovy | 105 ++++++++++++
.../test_pythonudtf_object_types_inline.groovy | 105 ++++++++++++
5 files changed, 462 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
index bc5edcbb59b..4a367bb8079 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateFunctionCommand.java
@@ -37,6 +37,7 @@ import org.apache.doris.catalog.MapType;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.catalog.ScalarFunction;
import org.apache.doris.catalog.ScalarType;
+import org.apache.doris.catalog.StructField;
import org.apache.doris.catalog.StructType;
import org.apache.doris.catalog.Type;
import org.apache.doris.common.AnalysisException;
@@ -346,6 +347,7 @@ public class CreateFunctionCommand extends Command
implements ForwardWithSync {
}
if (binaryType == Function.BinaryType.JAVA_UDF) {
FunctionUtil.checkEnableJavaUdf();
+ checkUdfSupportedTypes();
if (!isAggregate && !isTableFunction) {
volatility = analyzeVolatility();
}
@@ -363,6 +365,7 @@ public class CreateFunctionCommand extends Command
implements ForwardWithSync {
extractExpirationTime();
} else if (binaryType == Function.BinaryType.PYTHON_UDF) {
FunctionUtil.checkEnablePythonUdf();
+ checkUdfSupportedTypes();
if (!isAggregate && !isTableFunction) {
volatility = analyzeVolatility();
}
@@ -418,6 +421,36 @@ public class CreateFunctionCommand extends Command
implements ForwardWithSync {
return runtimeVersionString != null &&
PYTHON_VERSION_PATTERN.matcher(runtimeVersionString).matches();
}
+ private void checkUdfSupportedTypes() throws AnalysisException {
+ Type[] argTypes = argsDef.getArgTypes();
+ for (int i = 0; i < argTypes.length; i++) {
+ checkUdfSupportedType(argTypes[i], "argument " + (i + 1));
+ }
+ checkUdfSupportedType(returnType.toCatalogDataType(), "return");
+ if (intermediateType != null) {
+ checkUdfSupportedType(intermediateType.toCatalogDataType(),
"intermediate");
+ }
+ }
+
+ private void checkUdfSupportedType(Type type, String typePosition) throws
AnalysisException {
+ // Reject bitmap/hll/quantile_state type
+ if (type.isObjectStored()) {
+ throw new AnalysisException(String.format(
+ "%s does not support %s type %s", binaryType,
typePosition, type.toSql()));
+ }
+
+ if (type.isArrayType()) {
+ checkUdfSupportedType(((ArrayType) type).getItemType(),
typePosition + " element");
+ } else if (type.isMapType()) {
+ checkUdfSupportedType(((MapType) type).getKeyType(), typePosition
+ " key");
+ checkUdfSupportedType(((MapType) type).getValueType(),
typePosition + " value");
+ } else if (type.isStructType()) {
+ for (StructField field : ((StructType) type).getFields()) {
+ checkUdfSupportedType(field.getType(), typePosition + " field
" + field.getName());
+ }
+ }
+ }
+
private Boolean parseBooleanFromProperties(String propertyString) throws
AnalysisException {
String valueOfString = properties.get(propertyString);
if (valueOfString == null) {
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java
b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java
index 426a45074b8..e6741b9e54c 100644
--- a/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java
+++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/CreateFunctionTest.java
@@ -130,6 +130,35 @@ public class CreateFunctionTest {
Assert.assertEquals(FunctionVolatility.VOLATILE, findFunction(db,
"py_default").getVolatility());
}
+ @Test
+ public void testCreatePythonFunctionRejectsObjectTypes() throws Exception {
+ ConnectContext ctx = UtFrameUtils.createDefaultCtx();
+ createDatabase(ctx, "create database py_obj_type_db;");
+ dorisAssert = new DorisAssert(ctx);
+ dorisAssert.useDatabase("py_obj_type_db");
+
+ assertCreateFunctionAnalysisException(ctx, "create function
py_obj_type_db.py_bitmap_arg(bitmap) returns int "
+ + "properties('type'='PYTHON_UDF', 'symbol'='evaluate',
'runtime_version'='3.10.2');",
+ "PYTHON_UDF does not support argument 1 type bitmap");
+ assertCreateFunctionAnalysisException(ctx, "create function
py_obj_type_db.j_bitmap_arg(bitmap) returns int "
+ + "properties('type'='JAVA_UDF', 'symbol'='evaluate');",
+ "JAVA_UDF does not support argument 1 type bitmap");
+ assertCreateFunctionAnalysisException(ctx, "create function
py_obj_type_db.py_hll_ret(int) returns hll "
+ + "properties('type'='PYTHON_UDF', 'symbol'='evaluate',
'runtime_version'='3.10.2');",
+ "PYTHON_UDF does not support return type hll");
+ assertCreateFunctionAnalysisException(ctx, "create aggregate function
py_obj_type_db.py_quantile_arg"
+ + "(quantile_state) returns int
properties('type'='PYTHON_UDF', 'symbol'='Agg', "
+ + "'runtime_version'='3.10.2');",
+ "PYTHON_UDF does not support argument 1 type quantile_state");
+ assertCreateFunctionAnalysisException(ctx, "create aggregate function
py_obj_type_db.j_quantile_arg"
+ + "(quantile_state) returns int properties('type'='JAVA_UDF',
'symbol'='Agg');",
+ "JAVA_UDF does not support argument 1 type quantile_state");
+ assertCreateFunctionAnalysisException(ctx, "create tables function
py_obj_type_db.py_bitmap_table(int) "
+ + "returns array<bitmap> properties('type'='PYTHON_UDF',
'symbol'='evaluate', "
+ + "'runtime_version'='3.10.2');",
+ "ARRAY unsupported sub-type: bitmap");
+ }
+
@Test
public void testCreateGlobalFunction() throws Exception {
ConnectContext ctx = UtFrameUtils.createDefaultCtx();
@@ -215,6 +244,12 @@ public class CreateFunctionTest {
}
}
+ private void assertCreateFunctionAnalysisException(ConnectContext ctx,
String sql, String message) {
+ Exception exception = Assert.assertThrows(Exception.class, () ->
createFunction(sql, ctx));
+ Assert.assertTrue("Expected error to contain: " + message + ", actual:
" + exception.getMessage(),
+ exception.getMessage().contains(message));
+ }
+
private boolean containsIgnoreCase(String str, String sub) {
return str.toLowerCase().contains(sub.toLowerCase());
}
diff --git
a/regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy
b/regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy
new file mode 100644
index 00000000000..d37f5d33074
--- /dev/null
+++
b/regression-test/suites/pythonudaf_p0/test_pythonudaf_object_types_inline.groovy
@@ -0,0 +1,184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudaf_object_types_inline") {
+ def runtime_version = getPythonUdfRuntimeVersion()
+
+ test {
+ sql """
+ CREATE AGGREGATE FUNCTION py_obj_udaf_bitmap_arg(bitmap)
+ RETURNS BIGINT
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "Agg",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+class Agg:
+ def __init__(self):
+ self.sum = 0
+ def accumulate(self, v):
+ pass
+ def merge(self, other):
+ pass
+ def finish(self):
+ return self.sum
+ @property
+ def aggregate_state(self):
+ return self.sum
+\$\$;
+ """
+ exception "does not support argument 1 type bitmap"
+ }
+
+ test {
+ sql """
+ CREATE AGGREGATE FUNCTION py_obj_udaf_hll_ret(int)
+ RETURNS HLL
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "Agg",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+class Agg:
+ def __init__(self):
+ self.state = None
+ def accumulate(self, v):
+ pass
+ def merge(self, other):
+ pass
+ def finish(self):
+ return self.state
+ @property
+ def aggregate_state(self):
+ return self.state
+\$\$;
+ """
+ exception "does not support return type hll"
+ }
+
+ test {
+ sql """
+ CREATE AGGREGATE FUNCTION py_obj_udaf_quantile_state(quantile_state)
+ RETURNS BIGINT
+ INTERMEDIATE BIGINT
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "Agg",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+class Agg:
+ def __init__(self):
+ self.state = 0
+ def accumulate(self, v):
+ pass
+ def merge(self, other):
+ pass
+ def finish(self):
+ return self.state
+ @property
+ def aggregate_state(self):
+ return self.state
+\$\$;
+ """
+ exception "does not support argument 1 type quantile_state"
+ }
+
+ test {
+ sql """
+ CREATE AGGREGATE FUNCTION py_obj_udaf_bitmap_intermediate(int)
+ RETURNS BIGINT
+ INTERMEDIATE BITMAP
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "Agg",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+class Agg:
+ def __init__(self):
+ self.state = 0
+ def accumulate(self, v):
+ pass
+ def merge(self, other):
+ pass
+ def finish(self):
+ return self.state
+ @property
+ def aggregate_state(self):
+ return self.state
+\$\$;
+ """
+ exception "does not support intermediate type bitmap"
+ }
+
+ test {
+ sql """
+ CREATE AGGREGATE FUNCTION py_obj_udaf_array_bitmap(int)
+ RETURNS ARRAY<BITMAP>
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "Agg",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+class Agg:
+ def __init__(self):
+ self.state = None
+ def accumulate(self, v):
+ pass
+ def merge(self, other):
+ pass
+ def finish(self):
+ return self.state
+ @property
+ def aggregate_state(self):
+ return self.state
+\$\$;
+ """
+ exception "ARRAY unsupported sub-type: bitmap"
+ }
+
+ test {
+ sql """
+ CREATE AGGREGATE FUNCTION py_obj_udaf_struct_bitmap(int)
+ RETURNS STRUCT<plain:INT, nested:MAP<INT, ARRAY<HLL>>>
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "Agg",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+class Agg:
+ def __init__(self):
+ self.state = None
+ def accumulate(self, v):
+ pass
+ def merge(self, other):
+ pass
+ def finish(self):
+ return self.state
+ @property
+ def aggregate_state(self):
+ return self.state
+\$\$;
+ """
+ exception "ARRAY unsupported sub-type: hll"
+ }
+}
diff --git
a/regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy
b/regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy
new file mode 100644
index 00000000000..b141e6d503e
--- /dev/null
+++
b/regression-test/suites/pythonudf_p0/test_pythonudf_object_types_inline.groovy
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudf_object_types_inline") {
+ def runtime_version = getPythonUdfRuntimeVersion()
+
+ test {
+ sql """
+ CREATE FUNCTION py_obj_udf_bitmap_arg(bitmap)
+ RETURNS INT
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ return 1
+\$\$;
+ """
+ exception "does not support argument 1 type bitmap"
+ }
+
+ test {
+ sql """
+ CREATE FUNCTION py_obj_udf_hll_ret(int)
+ RETURNS HLL
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ return None
+\$\$;
+ """
+ exception "does not support return type hll"
+ }
+
+ test {
+ sql """
+ CREATE FUNCTION py_obj_udf_array_bitmap(array<int>)
+ RETURNS ARRAY<BITMAP>
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ return None
+\$\$;
+ """
+ exception "ARRAY unsupported sub-type: bitmap"
+ }
+
+ test {
+ sql """
+ CREATE FUNCTION py_obj_udf_map_bitmap(map<int, bitmap>)
+ RETURNS INT
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ return 1
+\$\$;
+ """
+ exception "MAP unsupported sub-type: bitmap"
+ }
+
+ test {
+ sql """
+ CREATE FUNCTION py_obj_udf_struct_bitmap(INT)
+ RETURNS STRUCT<plain:INT, nested:ARRAY<STRUCT<b:BITMAP>>>
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ return None
+\$\$;
+ """
+ exception "STRUCT unsupported sub-type: bitmap"
+ }
+}
diff --git
a/regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy
b/regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy
new file mode 100644
index 00000000000..0d4259a6e6e
--- /dev/null
+++
b/regression-test/suites/pythonudtf_p0/test_pythonudtf_object_types_inline.groovy
@@ -0,0 +1,105 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_pythonudtf_object_types_inline") {
+ def runtime_version = getPythonUdfRuntimeVersion()
+
+ test {
+ sql """
+ CREATE TABLES FUNCTION py_obj_udtf_bitmap_arg(bitmap)
+ RETURNS ARRAY<STRUCT<value:INT>>
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ yield (1,)
+\$\$;
+ """
+ exception "does not support argument 1 type bitmap"
+ }
+
+ test {
+ sql """
+ CREATE TABLES FUNCTION py_obj_udtf_hll_ret(int)
+ RETURNS ARRAY<HLL>
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ yield (1,)
+\$\$;
+ """
+ exception "ARRAY unsupported sub-type: hll"
+ }
+
+ test {
+ sql """
+ CREATE TABLES FUNCTION py_obj_udtf_quantile_state(quantile_state)
+ RETURNS ARRAY<STRUCT<value:INT>>
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ yield (1,)
+\$\$;
+ """
+ exception "does not support argument 1 type quantile_state"
+ }
+
+ test {
+ sql """
+ CREATE TABLES FUNCTION py_obj_udtf_array_bitmap(array<bitmap>)
+ RETURNS ARRAY<STRUCT<value:INT>>
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ yield (1,)
+\$\$;
+ """
+ exception "ARRAY unsupported sub-type: bitmap"
+ }
+
+ test {
+ sql """
+ CREATE TABLES FUNCTION py_obj_udtf_struct_bitmap(int)
+ RETURNS ARRAY<STRUCT<plain:INT, nested:MAP<INT, ARRAY<BITMAP>>>>
+ PROPERTIES (
+ "type" = "PYTHON_UDF",
+ "symbol" = "evaluate",
+ "runtime_version" = "${runtime_version}"
+ )
+ AS \$\$
+def evaluate(v):
+ yield (1,)
+\$\$;
+ """
+ exception "ARRAY unsupported sub-type: bitmap"
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]