This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.1 by this push: new c8bf22e [SPARK-34003][SQL] Fix Rule conflicts between PaddingAndLengthCheckForCharVarchar and ResolveAggregateFunctions c8bf22e is described below commit c8bf22e9f7b73cd6f8873c8ea466c59361335ae5 Author: Kent Yao <y...@apache.org> AuthorDate: Fri Jan 8 09:05:22 2021 +0000 [SPARK-34003][SQL] Fix Rule conflicts between PaddingAndLengthCheckForCharVarchar and ResolveAggregateFunctions ### What changes were proposed in this pull request? ResolveAggregateFunctions is a hacky rule and it calls `executeSameContext` to generate a `resolved agg` to determine which unresolved sort attribute should be pushed into the agg. However, after we add the PaddingAndLengthCheckForCharVarchar rule which will rewrite the query output, thus, the `resolved agg` cannot match original attributes anymore. It causes some dissociative sort attribute to be pushed in and fails the query ``` logtalk [info] Failed to analyze query: org.apache.spark.sql.AnalysisException: expression 'testcat.t1.`v`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; [info] Project [v#14, sum(i)#11L] [info] +- Sort [aggOrder#12 ASC NULLS FIRST], true [info] +- !Aggregate [v#14], [v#14, sum(cast(i#7 as bigint)) AS sum(i)#11L, v#13 AS aggOrder#12] [info] +- SubqueryAlias testcat.t1 [info] +- Project [if ((length(v#6) <= 3)) v#6 else if ((length(rtrim(v#6, None)) > 3)) cast(raise_error(concat(input string of length , cast(length(v#6) as string), exceeds varchar type length limitation: 3)) as string) else rpad(rtrim(v#6, None), 3, ) AS v#14, i#7] [info] +- RelationV2[v#6, i#7, index#15, _partition#16] testcat.t1 [info] [info] Project [v#14, sum(i)#11L] [info] +- Sort [aggOrder#12 ASC NULLS FIRST], true [info] +- !Aggregate [v#14], [v#14, sum(cast(i#7 as bigint)) AS sum(i)#11L, v#13 AS aggOrder#12] [info] +- SubqueryAlias testcat.t1 [info] +- Project [if ((length(v#6) <= 3)) v#6 else if ((length(rtrim(v#6, None)) > 3)) cast(raise_error(concat(input string of length , cast(length(v#6) as string), exceeds varchar type length limitation: 3)) as string) else rpad(rtrim(v#6, None), 3, ) AS v#14, i#7] [info] +- RelationV2[v#6, i#7, index#15, _partition#16] testcat.t1 ``` ### Why are the changes needed? bugfix ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? new tests Closes #31027 from yaooqinn/SPARK-34003. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 0f8e5dd445b03161a27893ba714db57919d8bcab) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 14 ++++++++++---- .../scala/org/apache/spark/sql/CharVarcharTestSuite.scala | 8 ++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index d138ff3..7a3b8b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2323,16 +2323,22 @@ class Analyzer(override val catalogManager: CatalogManager) // to push down this ordering expression and can reference the original aggregate // expression instead. val needsPushDown = ArrayBuffer.empty[NamedExpression] - val evaluatedOrderings = resolvedAliasedOrdering.zip(unresolvedSortOrders).map { - case (evaluated, order) => + val orderToAlias = unresolvedSortOrders.zip(aliasedOrdering) + val evaluatedOrderings = resolvedAliasedOrdering.zip(orderToAlias).map { + case (evaluated, (order, aliasOrder)) => val index = originalAggExprs.indexWhere { case Alias(child, _) => child semanticEquals evaluated.child case other => other semanticEquals evaluated.child } if (index == -1) { - needsPushDown += evaluated - order.copy(child = evaluated.toAttribute) + if (CharVarcharUtils.getRawType(evaluated.metadata).nonEmpty) { + needsPushDown += aliasOrder + order.copy(child = aliasOrder) + } else { + needsPushDown += evaluated + order.copy(child = evaluated.toAttribute) + } } else { order.copy(child = originalAggExprs(index).toAttribute) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala index d20cee0..fb35d6c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala @@ -466,6 +466,14 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils { Row("c")) } } + + test("SPARK-34003: fix char/varchar fails w/ both group by and order by ") { + withTable("t") { + sql(s"CREATE TABLE t(v VARCHAR(3), i INT) USING $format") + sql("INSERT INTO t VALUES ('c', 1)") + checkAnswer(sql("SELECT v, sum(i) FROM t GROUP BY v ORDER BY v"), Row("c", 1)) + } + } } // Some basic char/varchar tests which doesn't rely on table implementation. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org