This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new 70e0e20b985f [SPARK-47895][SQL] group by alias should be idempotent 70e0e20b985f is described below commit 70e0e20b985f7d497541826ea69ebd5c8c8c5c39 Author: Mihailo Milosevic <mihailo.milose...@databricks.com> AuthorDate: Mon Mar 31 21:44:38 2025 +0800 [SPARK-47895][SQL] group by alias should be idempotent ### What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/43797 . GROUP BY ALIAS has the same bug and this PR applies the same fix to GROUP BY ALIAS ### Why are the changes needed? For advanced users or Spark plugins, they may manipulate the logical plans directly. We need to make the framework more reliable. ### Does this PR introduce _any_ user-facing change? Yes, we will fix the error. ### How was this patch tested? Test added. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #50461 from mihailom-db/mihailom-db/fixGroupBy. Authored-by: Mihailo Milosevic <mihailo.milose...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 02db87282dabca112b0d688560064f8b71e63d18) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../analysis/ResolveReferencesInAggregate.scala | 14 +++++++++++++- .../analysis/SubstituteUnresolvedOrdinalsSuite.scala | 18 ++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala index 7ea90854932e..d1aee4ca2a9d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala @@ -116,7 +116,19 @@ class ResolveReferencesInAggregate(val catalogManager: CatalogManager) extends S groupExprs.map { g => g.transformWithPruning(_.containsPattern(UNRESOLVED_ATTRIBUTE)) { case u: UnresolvedAttribute => - selectList.find(ne => conf.resolver(ne.name, u.name)).getOrElse(u) + val (result, index) = + selectList.zipWithIndex.find(ne => conf.resolver(ne._1.name, u.name)) + .getOrElse((u, -1)) + + trimAliases(result) match { + // HACK ALERT: If the expanded grouping expression is an integer literal, don't use it + // but use an integer literal of the index. The reason is we may + // repeatedly analyze the plan, and the original integer literal may cause + // failures with a later GROUP BY ordinal resolution. GROUP BY constant is + // meaningless so whatever value does not matter here. + case IntegerLiteral(_) => Literal(index + 1) + case _ => result + } } } } else { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala index 39cf298aec43..106a607ba6df 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala @@ -104,4 +104,22 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest { testRelationWithData.groupBy(Literal(1))(Literal(100).as("a")) ) } + + test("SPARK-47895: group by alias repeated analysis") { + val plan = testRelation.groupBy($"b")(Literal(100).as("b")).analyze + comparePlans( + plan, + testRelation.groupBy(Literal(1))(Literal(100).as("b")) + ) + + val testRelationWithData = testRelation.copy(data = Seq(new GenericInternalRow(Array(1: Any)))) + // Copy the plan to reset its `analyzed` flag, so that analyzer rules will re-apply. + val copiedPlan = plan.transform { + case _: LocalRelation => testRelationWithData + } + comparePlans( + copiedPlan.analyze, // repeated analysis + testRelationWithData.groupBy(Literal(1))(Literal(100).as("b")) + ) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org