This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new aa22199 [SPARK-35665][SQL] Resolve UnresolvedAlias in CollectMetrics aa22199 is described below commit aa221991b3f0f1b7c91e87337f9eb5d164167f5a Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Mon Jun 7 21:05:11 2021 +0900 [SPARK-35665][SQL] Resolve UnresolvedAlias in CollectMetrics ### What changes were proposed in this pull request? It's a long-standing bug that we forgot to resolve `UnresolvedAlias` in `CollectMetrics`. It's a bit hard to trigger this bug before 3.2 as most likely people won't create `UnresolvedAlias` when calling `Dataset.observe`. However things have been changed after https://github.com/apache/spark/pull/30974 This PR proposes to handle `CollectMetrics` in the rule `ResolveAliases`. ### Why are the changes needed? bug fix ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? updated test Closes #32803 from cloud-fan/minor. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit a70e66ecfa638cacc99b4e9a7c464e41ec92ad30) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 3 +++ .../test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 600a5af..899e723 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -420,6 +420,9 @@ class Analyzer(override val catalogManager: CatalogManager) case Project(projectList, child) if child.resolved && hasUnresolvedAlias(projectList) => Project(assignAliases(projectList), child) + + case c: CollectMetrics if c.child.resolved && hasUnresolvedAlias(c.metrics) => + c.copy(metrics = assignAliases(c.metrics)) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala index b17c935..e3fcf8b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala @@ -251,7 +251,8 @@ class DataFrameCallbackSuite extends QueryTest name = "my_event", min($"id").as("min_val"), max($"id").as("max_val"), - sum($"id").as("sum_val"), + // Test unresolved alias + sum($"id"), count(when($"id" % 2 === 0, 1)).as("num_even")) .observe( name = "other_event", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org