This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1dd430be7f6 [fix](variant) Reject COUNT DISTINCT on variant arguments
(#63479)
1dd430be7f6 is described below
commit 1dd430be7f620d21a83c16c1e5ed7bbba2d1fcfe
Author: lihangyu <[email protected]>
AuthorDate: Tue Jun 9 16:34:31 2026 +0800
[fix](variant) Reject COUNT DISTINCT on variant arguments (#63479)
Problem Summary: `COUNT(DISTINCT variant_subcolumn)` could reach BE hash
key selection and fail with a vague `INTERNAL_ERROR` when the argument
was `VARIANT`. This PR rejects `VARIANT` arguments during FE aggregate
analysis, including grouped distinct aggregate plans before
`DistinctAggregateRewriter` can split the distinct aggregate.
`COUNT(DISTINCT ...)` on `VARIANT` arguments now reports a clear
unsupported-type error instead of a BE internal error. Cast `VARIANT`
expressions to `STRING` or another supported scalar type before using
`COUNT DISTINCT`.
- Test: Unit Test / Build / Regression Test
- `./run-fe-ut.sh --run
org.apache.doris.nereids.trees.expressions.functions.agg.CountTest`
- `./build.sh --fe`
- `./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy
-d correctness_p0 -s test_array_order_by`
- `./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy
-d correctness_p0 -s test_array_string_order_by`
- `./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy
-d variant_p0 -s test_variant_count_distinct`
- `git diff --check`
- Behavior changed: Yes. `COUNT DISTINCT` on `VARIANT` now fails during
analysis with a clearer error instead of a BE `INTERNAL_ERROR`.
- Does this need documentation: No
---
.../trees/expressions/functions/agg/Count.java | 30 +++++++-
.../functions/agg/MultiDistinctCount.java | 7 ++
.../trees/expressions/functions/agg/CountTest.java | 84 ++++++++++++++++++++++
.../variant_p0/test_variant_count_distinct.groovy | 41 +++++++++++
4 files changed, 159 insertions(+), 3 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
index 8f486bfc2ef..d203ea1a92d 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
@@ -92,13 +92,37 @@ public class Count extends NotNullableAggregateFunction
public void checkLegalityAfterRewrite() {
// after rewrite, count(distinct bitmap_column) should be rewritten to
bitmap_union_count(bitmap_column)
for (Expression argument : getArguments()) {
- if (distinct && (argument.getDataType().isComplexType()
- || argument.getDataType().isObjectType() ||
argument.getDataType().isJsonType())) {
- throw new AnalysisException("COUNT DISTINCT could not process
type " + this.toSql());
+ if (distinct) {
+ checkDistinctArgument(argument, this);
}
}
}
+ static void checkDistinctArgument(Expression argument, Expression
function) {
+ DataType argumentType = argument.getDataType();
+ if (isUnsupportedDistinctArgument(argumentType)) {
+ throwDistinctArgumentException(function);
+ }
+ }
+
+ static void checkDistinctVariantArgument(Expression argument, Expression
function) {
+ DataType argumentType = argument.getDataType();
+ if (argumentType.isVariantType()) {
+ throwDistinctArgumentException(function);
+ }
+ }
+
+ private static boolean isUnsupportedDistinctArgument(DataType
argumentType) {
+ return argumentType.isComplexType()
+ || argumentType.isObjectType()
+ || argumentType.isJsonType()
+ || argumentType.isVariantType();
+ }
+
+ private static void throwDistinctArgumentException(Expression function) {
+ throw new AnalysisException("COUNT DISTINCT could not process type " +
function.toSql());
+ }
+
public boolean isStar() {
return isStar;
}
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
index 9e71a3eb647..a48b72e94d4 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
@@ -70,6 +70,13 @@ public class MultiDistinctCount extends
NotNullableAggregateFunction
return new MultiDistinctCount(getFunctionParams(false, children));
}
+ @Override
+ public void checkLegalityAfterRewrite() {
+ for (Expression argument : getArguments()) {
+ Count.checkDistinctVariantArgument(argument, new Count(true,
argument));
+ }
+ }
+
@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitMultiDistinctCount(this, context);
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java
new file mode 100644
index 00000000000..537b45062d6
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.agg;
+
+import org.apache.doris.common.Pair;
+import org.apache.doris.nereids.analyzer.UnboundSlot;
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import
org.apache.doris.nereids.rules.exploration.mv.rollup.SingleCombinatorRollupHandler;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import
org.apache.doris.nereids.trees.expressions.functions.combinator.StateCombinator;
+import
org.apache.doris.nereids.trees.expressions.functions.combinator.UnionCombinator;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.VariantType;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+class CountTest {
+ @Test
+ void testCountDistinctRejectsVariant() {
+ Count count = new Count(true, SlotReference.of("v",
VariantType.INSTANCE));
+
+ AnalysisException exception =
Assertions.assertThrows(AnalysisException.class,
+ count::checkLegalityAfterRewrite);
+ Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT
could not process type"));
+ Assertions.assertTrue(exception.getMessage().contains("count(DISTINCT
v)"));
+ }
+
+ @Test
+ void testMultiDistinctCountRejectsVariant() {
+ MultiDistinctCount count = new
MultiDistinctCount(SlotReference.of("v", VariantType.INSTANCE));
+
+ AnalysisException exception =
Assertions.assertThrows(AnalysisException.class,
+ count::checkLegalityAfterRewrite);
+ Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT
could not process type"));
+ Assertions.assertTrue(exception.getMessage().contains("count(DISTINCT
v)"));
+ }
+
+ @Test
+ void testMultiDistinctCountAllowsArray() {
+ MultiDistinctCount count = new MultiDistinctCount(
+ SlotReference.of("arr", ArrayType.of(IntegerType.INSTANCE)));
+
+ Assertions.assertDoesNotThrow(count::checkLegalityAfterRewrite);
+ }
+
+ @Test
+ void testMultiDistinctCountAllowsUnboundArgumentBeforeLegalityCheck() {
+ Assertions.assertDoesNotThrow(() -> new MultiDistinctCount(new
UnboundSlot("kint")));
+ }
+
+ @Test
+ void testMultiDistinctCountCanRollupFromUnionState() {
+ SlotReference kint = SlotReference.of("kint", IntegerType.INSTANCE);
+ MultiDistinctCount queryFunction = new MultiDistinctCount(kint);
+ StateCombinator stateCombinator =
StateCombinator.create(queryFunction);
+ UnionCombinator unionCombinator = new
UnionCombinator(ImmutableList.of(stateCombinator), queryFunction);
+ SlotReference mvSlot = SlotReference.of("__multi_distinct_count_1",
unionCombinator.getDataType());
+ Pair<Expression, Expression> mvExprToMvScanExprPair =
Pair.of(unionCombinator, mvSlot);
+
+ Assertions.assertTrue(SingleCombinatorRollupHandler.INSTANCE.canRollup(
+ queryFunction, queryFunction, mvExprToMvScanExprPair,
+ ImmutableMap.of(unionCombinator, mvSlot)));
+ }
+}
diff --git
a/regression-test/suites/variant_p0/test_variant_count_distinct.groovy
b/regression-test/suites/variant_p0/test_variant_count_distinct.groovy
new file mode 100644
index 00000000000..b82447bd1fe
--- /dev/null
+++ b/regression-test/suites/variant_p0/test_variant_count_distinct.groovy
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_variant_count_distinct") {
+ sql "DROP TABLE IF EXISTS test_variant_count_distinct_array_subcolumn"
+
+ sql """
+ CREATE TABLE test_variant_count_distinct_array_subcolumn (
+ id INT,
+ v VARIANT
+ ) DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES("replication_num" = "1")
+ """
+
+ sql """
+ INSERT INTO test_variant_count_distinct_array_subcolumn VALUES
+ (1, '{"arr":[1,2,3]}'),
+ (2, '{"arr":[4,5]}'),
+ (3, '{"arr":[1,2,3]}')
+ """
+
+ test {
+ sql "SELECT COUNT(DISTINCT v['arr']) FROM
test_variant_count_distinct_array_subcolumn"
+ exception "COUNT DISTINCT could not process type"
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]