This is an automated email from the ASF dual-hosted git repository.

eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1dd430be7f6 [fix](variant) Reject COUNT DISTINCT on variant arguments 
(#63479)
1dd430be7f6 is described below

commit 1dd430be7f620d21a83c16c1e5ed7bbba2d1fcfe
Author: lihangyu <[email protected]>
AuthorDate: Tue Jun 9 16:34:31 2026 +0800

    [fix](variant) Reject COUNT DISTINCT on variant arguments (#63479)
    
    Problem Summary: `COUNT(DISTINCT variant_subcolumn)` could reach BE hash
    key selection and fail with a vague `INTERNAL_ERROR` when the argument
    was `VARIANT`. This PR rejects `VARIANT` arguments during FE aggregate
    analysis, including grouped distinct aggregate plans before
    `DistinctAggregateRewriter` can split the distinct aggregate.
    
    `COUNT(DISTINCT ...)` on `VARIANT` arguments now reports a clear
    unsupported-type error instead of a BE internal error. Cast `VARIANT`
    expressions to `STRING` or another supported scalar type before using
    `COUNT DISTINCT`.
    
    - Test: Unit Test / Build / Regression Test
    - `./run-fe-ut.sh --run
    org.apache.doris.nereids.trees.expressions.functions.agg.CountTest`
        - `./build.sh --fe`
    - `./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy
    -d correctness_p0 -s test_array_order_by`
    - `./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy
    -d correctness_p0 -s test_array_string_order_by`
    - `./run-regression-test.sh --run --conf tmp/regression-conf.auto.groovy
    -d variant_p0 -s test_variant_count_distinct`
        - `git diff --check`
    - Behavior changed: Yes. `COUNT DISTINCT` on `VARIANT` now fails during
    analysis with a clearer error instead of a BE `INTERNAL_ERROR`.
    - Does this need documentation: No
---
 .../trees/expressions/functions/agg/Count.java     | 30 +++++++-
 .../functions/agg/MultiDistinctCount.java          |  7 ++
 .../trees/expressions/functions/agg/CountTest.java | 84 ++++++++++++++++++++++
 .../variant_p0/test_variant_count_distinct.groovy  | 41 +++++++++++
 4 files changed, 159 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
index 8f486bfc2ef..d203ea1a92d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/Count.java
@@ -92,13 +92,37 @@ public class Count extends NotNullableAggregateFunction
     public void checkLegalityAfterRewrite() {
         // after rewrite, count(distinct bitmap_column) should be rewritten to 
bitmap_union_count(bitmap_column)
         for (Expression argument : getArguments()) {
-            if (distinct && (argument.getDataType().isComplexType()
-                    || argument.getDataType().isObjectType() || 
argument.getDataType().isJsonType())) {
-                throw new AnalysisException("COUNT DISTINCT could not process 
type " + this.toSql());
+            if (distinct) {
+                checkDistinctArgument(argument, this);
             }
         }
     }
 
+    static void checkDistinctArgument(Expression argument, Expression 
function) {
+        DataType argumentType = argument.getDataType();
+        if (isUnsupportedDistinctArgument(argumentType)) {
+            throwDistinctArgumentException(function);
+        }
+    }
+
+    static void checkDistinctVariantArgument(Expression argument, Expression 
function) {
+        DataType argumentType = argument.getDataType();
+        if (argumentType.isVariantType()) {
+            throwDistinctArgumentException(function);
+        }
+    }
+
+    private static boolean isUnsupportedDistinctArgument(DataType 
argumentType) {
+        return argumentType.isComplexType()
+                || argumentType.isObjectType()
+                || argumentType.isJsonType()
+                || argumentType.isVariantType();
+    }
+
+    private static void throwDistinctArgumentException(Expression function) {
+        throw new AnalysisException("COUNT DISTINCT could not process type " + 
function.toSql());
+    }
+
     public boolean isStar() {
         return isStar;
     }
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
index 9e71a3eb647..a48b72e94d4 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/agg/MultiDistinctCount.java
@@ -70,6 +70,13 @@ public class MultiDistinctCount extends 
NotNullableAggregateFunction
         return new MultiDistinctCount(getFunctionParams(false, children));
     }
 
+    @Override
+    public void checkLegalityAfterRewrite() {
+        for (Expression argument : getArguments()) {
+            Count.checkDistinctVariantArgument(argument, new Count(true, 
argument));
+        }
+    }
+
     @Override
     public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
         return visitor.visitMultiDistinctCount(this, context);
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java
new file mode 100644
index 00000000000..537b45062d6
--- /dev/null
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/agg/CountTest.java
@@ -0,0 +1,84 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.trees.expressions.functions.agg;
+
+import org.apache.doris.common.Pair;
+import org.apache.doris.nereids.analyzer.UnboundSlot;
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import 
org.apache.doris.nereids.rules.exploration.mv.rollup.SingleCombinatorRollupHandler;
+import org.apache.doris.nereids.trees.expressions.Expression;
+import org.apache.doris.nereids.trees.expressions.SlotReference;
+import 
org.apache.doris.nereids.trees.expressions.functions.combinator.StateCombinator;
+import 
org.apache.doris.nereids.trees.expressions.functions.combinator.UnionCombinator;
+import org.apache.doris.nereids.types.ArrayType;
+import org.apache.doris.nereids.types.IntegerType;
+import org.apache.doris.nereids.types.VariantType;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+class CountTest {
+    @Test
+    void testCountDistinctRejectsVariant() {
+        Count count = new Count(true, SlotReference.of("v", 
VariantType.INSTANCE));
+
+        AnalysisException exception = 
Assertions.assertThrows(AnalysisException.class,
+                count::checkLegalityAfterRewrite);
+        Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT 
could not process type"));
+        Assertions.assertTrue(exception.getMessage().contains("count(DISTINCT 
v)"));
+    }
+
+    @Test
+    void testMultiDistinctCountRejectsVariant() {
+        MultiDistinctCount count = new 
MultiDistinctCount(SlotReference.of("v", VariantType.INSTANCE));
+
+        AnalysisException exception = 
Assertions.assertThrows(AnalysisException.class,
+                count::checkLegalityAfterRewrite);
+        Assertions.assertTrue(exception.getMessage().contains("COUNT DISTINCT 
could not process type"));
+        Assertions.assertTrue(exception.getMessage().contains("count(DISTINCT 
v)"));
+    }
+
+    @Test
+    void testMultiDistinctCountAllowsArray() {
+        MultiDistinctCount count = new MultiDistinctCount(
+                SlotReference.of("arr", ArrayType.of(IntegerType.INSTANCE)));
+
+        Assertions.assertDoesNotThrow(count::checkLegalityAfterRewrite);
+    }
+
+    @Test
+    void testMultiDistinctCountAllowsUnboundArgumentBeforeLegalityCheck() {
+        Assertions.assertDoesNotThrow(() -> new MultiDistinctCount(new 
UnboundSlot("kint")));
+    }
+
+    @Test
+    void testMultiDistinctCountCanRollupFromUnionState() {
+        SlotReference kint = SlotReference.of("kint", IntegerType.INSTANCE);
+        MultiDistinctCount queryFunction = new MultiDistinctCount(kint);
+        StateCombinator stateCombinator = 
StateCombinator.create(queryFunction);
+        UnionCombinator unionCombinator = new 
UnionCombinator(ImmutableList.of(stateCombinator), queryFunction);
+        SlotReference mvSlot = SlotReference.of("__multi_distinct_count_1", 
unionCombinator.getDataType());
+        Pair<Expression, Expression> mvExprToMvScanExprPair = 
Pair.of(unionCombinator, mvSlot);
+
+        Assertions.assertTrue(SingleCombinatorRollupHandler.INSTANCE.canRollup(
+                queryFunction, queryFunction, mvExprToMvScanExprPair,
+                ImmutableMap.of(unionCombinator, mvSlot)));
+    }
+}
diff --git 
a/regression-test/suites/variant_p0/test_variant_count_distinct.groovy 
b/regression-test/suites/variant_p0/test_variant_count_distinct.groovy
new file mode 100644
index 00000000000..b82447bd1fe
--- /dev/null
+++ b/regression-test/suites/variant_p0/test_variant_count_distinct.groovy
@@ -0,0 +1,41 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_variant_count_distinct") {
+    sql "DROP TABLE IF EXISTS test_variant_count_distinct_array_subcolumn"
+
+    sql """
+        CREATE TABLE test_variant_count_distinct_array_subcolumn (
+            id INT,
+            v VARIANT
+        ) DUPLICATE KEY(id)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1")
+    """
+
+    sql """
+        INSERT INTO test_variant_count_distinct_array_subcolumn VALUES
+        (1, '{"arr":[1,2,3]}'),
+        (2, '{"arr":[4,5]}'),
+        (3, '{"arr":[1,2,3]}')
+    """
+
+    test {
+        sql "SELECT COUNT(DISTINCT v['arr']) FROM 
test_variant_count_distinct_array_subcolumn"
+        exception "COUNT DISTINCT could not process type"
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to