924060929 commented on code in PR #64458:
URL: https://github.com/apache/doris/pull/64458#discussion_r3402338162
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java:
##########
@@ -300,37 +300,51 @@ public LogicalPlan normalizeAgg(LogicalAggregate<Plan>
aggregate, Optional<Logic
}
if (!missingSlotsInAggregate.isEmpty()) {
if (SqlModeHelper.hasOnlyFullGroupBy()) {
- throw new AnalysisException(String.format("PROJECT
expression %s must appear in the GROUP BY"
- + " clause or be used in an aggregate function",
- missingSlotsInAggregate.stream()
- .map(slot -> "'" + slot.getName() + "'")
- .collect(Collectors.joining(", "))));
- } else {
- // for any slots missing in aggregate's output, we should
add a any_value(slot) into
- // aggregate's output list and slot itself into bottom
project's output list
- bottomProjects = Sets.union(bottomProjects,
missingSlotsInAggregate);
- Map<Expression, Expression> replaceMap = Maps.newHashMap();
- Map<String, Alias> normalizedAggExistingAlias =
Maps.newHashMap();
- for (NamedExpression output :
normalizedAggOutputBuilder.build()) {
- if (output instanceof Alias) {
- normalizedAggExistingAlias.put(output.getName(),
(Alias) output);
- }
- }
+ // Under only_full_group_by, a non-aggregated output
expression is allowed only when it
+ // is constant for every input row (a uniform slot). This
matches MySQL, which accepts
+ // such expressions by functional dependency, e.g.
Review Comment:
Fixed in a8e7db00dc2: switched the predicate to `isUniformAndNotNull`.
`addUniformSlotForOuterJoinNullableSide` only adds entries that are
nullable-without-const-value or a NULL literal, so requiring not-null excludes
the outer-join case while still allowing genuine non-null constants like `1 AS
a`. (`isUniformAndHasConstValue` would be too strict here, since
`LogicalOneRowRelation.computeUniform` records uniform slots without a const
value.) Added unit test `outerJoinNullableUniformRejected` using your repro.
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/NormalizeAggregate.java:
##########
@@ -300,37 +300,51 @@ public LogicalPlan normalizeAgg(LogicalAggregate<Plan>
aggregate, Optional<Logic
}
if (!missingSlotsInAggregate.isEmpty()) {
if (SqlModeHelper.hasOnlyFullGroupBy()) {
- throw new AnalysisException(String.format("PROJECT
expression %s must appear in the GROUP BY"
- + " clause or be used in an aggregate function",
- missingSlotsInAggregate.stream()
- .map(slot -> "'" + slot.getName() + "'")
- .collect(Collectors.joining(", "))));
- } else {
- // for any slots missing in aggregate's output, we should
add a any_value(slot) into
- // aggregate's output list and slot itself into bottom
project's output list
- bottomProjects = Sets.union(bottomProjects,
missingSlotsInAggregate);
- Map<Expression, Expression> replaceMap = Maps.newHashMap();
- Map<String, Alias> normalizedAggExistingAlias =
Maps.newHashMap();
- for (NamedExpression output :
normalizedAggOutputBuilder.build()) {
- if (output instanceof Alias) {
- normalizedAggExistingAlias.put(output.getName(),
(Alias) output);
- }
- }
+ // Under only_full_group_by, a non-aggregated output
expression is allowed only when it
+ // is constant for every input row (a uniform slot). This
matches MySQL, which accepts
+ // such expressions by functional dependency, e.g.
+ // SELECT a AS b, b AS c FROM (SELECT 1 AS a, 2 AS b) t
GROUP BY b, c
+ // here 'a' is a constant column of the derived table.
Non-constant missing slots are
+ // still rejected. The constant ones fall through to the
any_value() wrapping below
+ // (any_value of a constant is that constant), so the
result stays unambiguous.
+ DataTrait childTrait =
aggregate.child().getLogicalProperties().getTrait();
+ List<String> invalidMissingSlots = new ArrayList<>();
for (Slot slot : missingSlotsInAggregate) {
- AnyValue anyValue = new AnyValue(false,
normalizedGroupExprs.isEmpty(), slot);
- Alias exisitingAlias =
normalizedAggExistingAlias.get(slot.getName());
- if (exisitingAlias != null &&
anyValue.equals(exisitingAlias.child())) {
- replaceMap.put(slot, exisitingAlias.toSlot());
- } else {
- Alias anyValueAlias = new Alias(anyValue,
slot.getName());
- replaceMap.put(slot, anyValueAlias.toSlot());
- normalizedAggOutputBuilder.add(anyValueAlias);
+ if (!childTrait.isUniform(slot)) {
+ invalidMissingSlots.add("'" + slot.getName() +
"'");
}
}
- upperProjects = upperProjects.stream()
- .map(e -> (NamedExpression)
ExpressionUtils.replace(e, replaceMap))
- .collect(ImmutableList.toImmutableList());
+ if (!invalidMissingSlots.isEmpty()) {
+ throw new AnalysisException(String.format("PROJECT
expression %s must appear in the GROUP BY"
+ + " clause or be used in an aggregate
function",
+ String.join(", ", invalidMissingSlots)));
+ }
+ }
+ // For any slots missing in aggregate's output, we add an
any_value(slot) into aggregate's
+ // output list and the slot itself into the bottom project's
output list. When
+ // only_full_group_by is enabled, all remaining missing slots
are constant (checked above).
+ bottomProjects = Sets.union(bottomProjects,
missingSlotsInAggregate);
Review Comment:
Fixed in a8e7db00dc2, in two parts:
1. Pass `newAggregate.getOutputExpressions()` (the current output, including
the any_value aliases) to `eliminateGroupByConstant` instead of the stale
`normalizedAggOutput` snapshot.
2. That alone was not enough. I probed the rewritten plans: a later pass
collapses the aggregate after constant-key elimination and does not preserve
the any_value aliases. This happens for any constant group-by key (e.g. `GROUP
BY k2, 'g'`), not only the all-constant case. So the relaxation now skips cases
with a constant group-by key and keeps the original rejection there — same
behavior as before this PR, and no dangling plan. Added unit test
`constantGroupKeyStillRejected`.
##########
regression-test/suites/nereids_syntax_p0/test_group_by_constant_output.groovy:
##########
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_group_by_constant_output") {
+ sql "SET enable_nereids_planner=true"
+ sql "SET enable_fallback_to_original_planner=false"
+
+ // The output alias 'b' collides with the derived-table column 'b', so
under only_full_group_by the
+ // column 'b' is grouped and the constant column 'a' is left ungrouped.
Because 'a' is constant for
+ // every input row it is accepted (MySQL functional-dependency behavior),
and the query returns (1, 2).
+ def r1 = sql "SELECT a as b, b as c FROM (SELECT 1 as a, 2 as b) t1 GROUP
BY b, c"
+ assertEquals(1, r1.size())
Review Comment:
Fixed in a8e7db00dc2: converted the positive result cases to `qt_*` with a
generated `.out` file.
##########
regression-test/suites/nereids_syntax_p0/test_group_by_constant_output.groovy:
##########
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_group_by_constant_output") {
+ sql "SET enable_nereids_planner=true"
+ sql "SET enable_fallback_to_original_planner=false"
+
+ // The output alias 'b' collides with the derived-table column 'b', so
under only_full_group_by the
+ // column 'b' is grouped and the constant column 'a' is left ungrouped.
Because 'a' is constant for
+ // every input row it is accepted (MySQL functional-dependency behavior),
and the query returns (1, 2).
+ def r1 = sql "SELECT a as b, b as c FROM (SELECT 1 as a, 2 as b) t1 GROUP
BY b, c"
+ assertEquals(1, r1.size())
+ assertEquals(1, (r1[0][0] as int))
+ assertEquals(2, (r1[0][1] as int))
+
+ // A constant column not present in GROUP BY is allowed even though it is
neither grouped nor aggregated.
+ def r2 = sql "SELECT a, b FROM (SELECT 1 as a, 2 as b) t1 GROUP BY a"
+ assertEquals(1, r2.size())
+ assertEquals(1, (r2[0][0] as int))
+ assertEquals(2, (r2[0][1] as int))
+
+ // The same shape over a real table leaves a non-constant column
ungrouped, which is still rejected
+ // (matching MySQL, where only constant / functionally-dependent columns
are allowed).
+ sql "DROP TABLE IF EXISTS test_gb_const_t"
+ sql """
+ CREATE TABLE test_gb_const_t (a INT, b INT) ENGINE=OLAP
+ DUPLICATE KEY(a)
+ DISTRIBUTED BY HASH(a) BUCKETS 1
+ PROPERTIES ('replication_num' = '1')
+ """
+ test {
+ sql "SELECT a as b, b as c FROM test_gb_const_t GROUP BY b, c"
+ exception "must appear in the GROUP BY"
+ }
+
+ sql "DROP TABLE IF EXISTS test_gb_const_t"
Review Comment:
Fixed in a8e7db00dc2: removed the final DROP and kept only the
DROP-before-create.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]