This is an automated email from the ASF dual-hosted git repository.
924060929 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 2915bdb6230 [fix](nereids) allow constant output column not in GROUP
BY under only_full_group_by (#64458)
2915bdb6230 is described below
commit 2915bdb62302da12e80a01a386a21ab074d09662
Author: 924060929 <[email protected]>
AuthorDate: Thu Jun 18 15:58:01 2026 +0800
[fix](nereids) allow constant output column not in GROUP BY under
only_full_group_by (#64458)
Problem Summary:
Under `only_full_group_by` (the default), Nereids rejected any
non-aggregated select
expression that was neither in `GROUP BY` nor wrapped in an aggregate
function, even when
that expression is **constant for every input row**. MySQL accepts such
expressions via
functional dependency. For example:
```sql
SELECT a AS b, b AS c FROM (SELECT 1 AS a, 2 AS b) t1 GROUP BY b, c;
```
The output alias `b` collides with the derived-table column `b`, so
column-first name
resolution groups by the column `b` and leaves the constant column `a`
ungrouped. This
previously failed with:
```
PROJECT expression 'a' must appear in the GROUP BY clause or be used in an
aggregate function
```
`NormalizeAggregate` now only rejects **non-constant** missing slots.
Constant (uniform)
slots flow through the existing `any_value()` wrapping (any_value of a
constant is that
constant), so the result stays unambiguous and the query returns `(1,
2)`, matching MySQL.
The same shape over a real table, where the ungrouped column is
non-constant, is still
rejected.
### Release note
Allow a constant (uniform) non-aggregated column that is not in `GROUP
BY` under
`only_full_group_by`, matching MySQL functional-dependency behavior.
---
.../nereids/rules/analysis/NormalizeAggregate.java | 58 +++++++++---
.../analysis/OnlyFullGroupByConstantTest.java | 100 +++++++++++++++++++++
.../test_group_by_constant_output.out | 10 +++
.../test_group_by_constant_output.groovy | 46 ++++++++++
4 files changed, 203 insertions(+), 11 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
index 0db8339239a..2306259dec2 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java
@@ -19,6 +19,7 @@ package org.apache.doris.nereids.rules.analysis;
import org.apache.doris.nereids.CascadesContext;
import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.properties.DataTrait;
import org.apache.doris.nereids.rules.Rule;
import org.apache.doris.nereids.rules.RuleType;
import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
@@ -66,7 +67,6 @@ import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
-import java.util.stream.Collectors;
/**
* normalize aggregate's group keys and AggregateFunction's child to
SlotReference
@@ -299,15 +299,47 @@ public class NormalizeAggregate implements
RewriteRuleFactory, NormalizeToSlot {
}
}
if (!missingSlotsInAggregate.isEmpty()) {
- if (SqlModeHelper.hasOnlyFullGroupBy()) {
- throw new AnalysisException(String.format("PROJECT
expression %s must appear in the GROUP BY"
- + " clause or be used in an aggregate function",
- missingSlotsInAggregate.stream()
- .map(slot -> "'" + slot.getName() + "'")
- .collect(Collectors.joining(", "))));
- } else {
- // for any slots missing in aggregate's output, we should
add a any_value(slot) into
- // aggregate's output list and slot itself into bottom
project's output list
+ // Under only_full_group_by, a non-grouped, non-aggregated
output column that is constant for
+ // every input row (uniform and not null) is valid (MySQL
functional dependency), e.g.
+ // SELECT a AS b, b AS c FROM (SELECT 1 AS a, 2 AS b) t
GROUP BY b, c
+ // where 'a' is a constant column of the derived table. We add
such a column to the group-by
+ // keys rather than wrapping it in any_value(): grouping by a
constant does not change the
+ // grouping, makes the column a valid output, and changes no
exprId, so references in ancestors
+ // (e.g. the result sink) stay valid -- any_value() would need
a new exprId that this rule
+ // cannot propagate upward. The redundant uniform key is later
removed by
+ // EliminateGroupByKeyByUniform. Only done when the aggregate
already has group-by keys; for a
+ // global aggregate adding a key would change empty-input
semantics. isUniformAndNotNull (not
+ // isUniform) excludes the nullable side of an outer join,
which holds the uniform value on
+ // matched rows but NULL on unmatched rows of the same group.
+ Set<Slot> constantMissingSlots = new HashSet<>();
+ if (SqlModeHelper.hasOnlyFullGroupBy() &&
!normalizedGroupExprs.isEmpty()) {
+ DataTrait childTrait =
aggregate.child().getLogicalProperties().getTrait();
+ for (Slot slot : missingSlotsInAggregate) {
+ if (childTrait.isUniformAndNotNull(slot)) {
+ constantMissingSlots.add(slot);
+ }
+ }
+ }
+ if (!constantMissingSlots.isEmpty()) {
+ bottomProjects = Sets.union(bottomProjects,
constantMissingSlots);
+ normalizedGroupExprs = ImmutableList.<Expression>builder()
+
.addAll(normalizedGroupExprs).addAll(constantMissingSlots).build();
+ for (Slot slot : constantMissingSlots) {
+ normalizedAggOutputBuilder.add(slot);
+ }
+ missingSlotsInAggregate.removeAll(constantMissingSlots);
+ }
+ if (!missingSlotsInAggregate.isEmpty()) {
+ if (SqlModeHelper.hasOnlyFullGroupBy()) {
+ List<String> invalidSlotNames = new ArrayList<>();
+ for (Slot slot : missingSlotsInAggregate) {
+ invalidSlotNames.add("'" + slot.getName() + "'");
+ }
+ throw new AnalysisException(String.format("PROJECT
expression %s must appear in the GROUP BY"
+ + " clause or be used in an aggregate
function", String.join(", ", invalidSlotNames)));
+ }
+ // only_full_group_by disabled: for the remaining missing
slots add an any_value(slot) into
+ // the aggregate's output and the slot itself into the
bottom project's output.
bottomProjects = Sets.union(bottomProjects,
missingSlotsInAggregate);
Map<Expression, Expression> replaceMap = Maps.newHashMap();
Map<String, Alias> normalizedAggExistingAlias =
Maps.newHashMap();
@@ -345,8 +377,12 @@ public class NormalizeAggregate implements
RewriteRuleFactory, NormalizeToSlot {
LogicalAggregate<?> newAggregate =
aggregate.withNormalized(normalizedGroupExprs,
normalizedAggOutputBuilder.build(), bottomPlan);
ExpressionRewriteContext rewriteContext = new
ExpressionRewriteContext(ctx);
+ // Use the current aggregate output (newAggregate already contains the
constant group-key slots added
+ // for missing slots above), not the stale normalizedAggOutput
snapshot, so constant group-key
+ // elimination keeps those outputs and does not leave dangling
references in upperProjects.
LogicalProject<Plan> project =
eliminateGroupByConstant(groupByExprContext, rewriteContext,
- normalizedGroupExprs, normalizedAggOutput, bottomProjects,
aggregate, upperProjects, newAggregate);
+ normalizedGroupExprs, newAggregate.getOutputExpressions(),
bottomProjects, aggregate,
+ upperProjects, newAggregate);
if (!having.isPresent()) {
return project;
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/OnlyFullGroupByConstantTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/OnlyFullGroupByConstantTest.java
new file mode 100644
index 00000000000..8639ee5d6f3
--- /dev/null
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/OnlyFullGroupByConstantTest.java
@@ -0,0 +1,100 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.nereids.rules.analysis;
+
+import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.util.PlanChecker;
+import org.apache.doris.utframe.TestWithFeService;
+
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+/**
+ * Under only_full_group_by, a non-grouped, non-aggregated output column that
is constant for every input
+ * row (uniform and not null) is accepted (MySQL functional dependency):
NormalizeAggregate adds it to the
+ * group-by keys. Non-constant columns, and uniform-but-nullable columns from
the nullable side of an outer
+ * join, are still rejected. All cases are verified through rewrite() (not
only analyze()).
+ */
+public class OnlyFullGroupByConstantTest extends TestWithFeService {
+ @Override
+ protected void runBeforeAll() throws Exception {
+ createDatabase("test");
+ connectContext.setDatabase("test");
+ // columns 'a','b' deliberately collide with the output aliases used
below
+ createTable("CREATE TABLE test.t_ab (a INT, b INT) ENGINE=OLAP\n"
+ + "DUPLICATE KEY(a) DISTRIBUTED BY HASH(a) BUCKETS 1
PROPERTIES ('replication_num' = '1');");
+ createTable("CREATE TABLE test.t_v (k INT, v INT) ENGINE=OLAP\n"
+ + "DUPLICATE KEY(k) DISTRIBUTED BY HASH(k) BUCKETS 1
PROPERTIES ('replication_num' = '1');");
+ }
+
+ private void rewriteOk(String sql) {
+ Assertions.assertDoesNotThrow(() ->
PlanChecker.from(connectContext).analyze(sql).rewrite().getPlan(),
+ "should plan without error: " + sql);
+ }
+
+ private void rejected(String sql) {
+ AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+ () -> PlanChecker.from(connectContext).analyze(sql).rewrite(),
"should be rejected: " + sql);
+ Assertions.assertTrue(ex.getMessage().contains("must appear in the
GROUP BY"),
+ "unexpected message: " + ex.getMessage());
+ }
+
+ @Test
+ public void aliasCollidesWithConstantColumn() {
+ // alias 'b' collides with the derived-table column 'b', so
column-first binding groups by column 'b'
+ // and leaves the constant column 'a' ungrouped. 'a' is constant ->
allowed.
+ rewriteOk("SELECT a as b, b as c FROM (SELECT 1 as a, 2 as b) t1 GROUP
BY b, c");
+ }
+
+ @Test
+ public void constantColumnNotInGroupBy() {
+ // 'b' is constant and not grouped (a bare output column) -> allowed
+ rewriteOk("SELECT a, b FROM (SELECT 1 as a, 2 as b) t1 GROUP BY a");
+ }
+
+ @Test
+ public void constantGroupByLiteral() {
+ // group-by key is itself a constant and gets eliminated; constant
outputs a, b stay valid
+ rewriteOk("SELECT a, b FROM (SELECT 1 as a, 2 as b) t1 GROUP BY 'g'");
+ }
+
+ @Test
+ public void nonConstantColumnStillRejected() {
+ // same shape over a real table: 'a' is non-constant and ungrouped ->
still rejected (MySQL parity)
+ rejected("SELECT a as b, b as c FROM test.t_ab GROUP BY b, c");
+ }
+
+ @Test
+ public void outerJoinNullableUniformRejected() {
+ // r.v is uniform (LIMIT 1) but propagated through the nullable side
of a LEFT JOIN, so it is the
+ // uniform value on matched rows and NULL on unmatched rows of the
same group -> must stay rejected.
+ rejected("SELECT r.v FROM (SELECT 1 AS g, 1 AS k UNION ALL SELECT 1 AS
g, 2 AS k) l "
+ + "LEFT JOIN (SELECT k, v FROM test.t_v LIMIT 1) r ON l.k =
r.k GROUP BY l.g");
+ }
+
+ @Test
+ public void mixedReportOnlyNonConstant() {
+ // group by b2; 'k' (constant 1) is allowed, 'a' (non-constant) must
be reported and 'k' must not.
+ AnalysisException ex = Assertions.assertThrows(AnalysisException.class,
+ () -> PlanChecker.from(connectContext).analyze(
+ "SELECT x.a, x.k, x.b2 FROM (SELECT a, 1 as k, b as b2
FROM test.t_ab) x GROUP BY x.b2")
+ .rewrite());
+ Assertions.assertTrue(ex.getMessage().contains("'a'"), "should report
'a': " + ex.getMessage());
+ Assertions.assertFalse(ex.getMessage().contains("'k'"), "should NOT
report constant 'k': " + ex.getMessage());
+ }
+}
diff --git
a/regression-test/data/nereids_syntax_p0/test_group_by_constant_output.out
b/regression-test/data/nereids_syntax_p0/test_group_by_constant_output.out
new file mode 100644
index 00000000000..37b8301f343
--- /dev/null
+++ b/regression-test/data/nereids_syntax_p0/test_group_by_constant_output.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !const_alias_collision --
+1 2
+
+-- !const_not_in_groupby --
+1 2
+
+-- !const_group_key --
+1 2
+
diff --git
a/regression-test/suites/nereids_syntax_p0/test_group_by_constant_output.groovy
b/regression-test/suites/nereids_syntax_p0/test_group_by_constant_output.groovy
new file mode 100644
index 00000000000..7156cd073b8
--- /dev/null
+++
b/regression-test/suites/nereids_syntax_p0/test_group_by_constant_output.groovy
@@ -0,0 +1,46 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_group_by_constant_output") {
+ sql "SET enable_nereids_planner=true"
+ sql "SET enable_fallback_to_original_planner=false"
+
+ // The output alias 'b' collides with the derived-table column 'b', so
under only_full_group_by the
+ // column 'b' is grouped and the constant column 'a' is left ungrouped.
Because 'a' is constant for
+ // every input row it is accepted (MySQL functional-dependency behavior),
and the query returns (1, 2).
+ qt_const_alias_collision "SELECT a as b, b as c FROM (SELECT 1 as a, 2 as
b) t1 GROUP BY b, c"
+
+ // A constant column that is a bare output and not in GROUP BY is allowed.
+ qt_const_not_in_groupby "SELECT a, b FROM (SELECT 1 as a, 2 as b) t1 GROUP
BY a"
+
+ // A constant group-by key is eliminated, and the constant outputs stay
valid.
+ qt_const_group_key "SELECT a, b FROM (SELECT 1 as a, 2 as b) t1 GROUP BY
'g'"
+
+ // The same shape over a real table leaves a non-constant column
ungrouped, which is still rejected
+ // (matching MySQL, where only constant / functionally-dependent columns
are allowed).
+ sql "DROP TABLE IF EXISTS test_gb_const_t"
+ sql """
+ CREATE TABLE test_gb_const_t (a INT, b INT) ENGINE=OLAP
+ DUPLICATE KEY(a)
+ DISTRIBUTED BY HASH(a) BUCKETS 1
+ PROPERTIES ('replication_num' = '1')
+ """
+ test {
+ sql "SELECT a as b, b as c FROM test_gb_const_t GROUP BY b, c"
+ exception "must appear in the GROUP BY"
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]