This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new be1b282 [SPARK-32237][SQL][3.0] Resolve hint in CTE be1b282 is described below commit be1b28209d8b6ac1f46e8899b85f7cb691a97051 Author: LantaoJin <jinlan...@gmail.com> AuthorDate: Fri Jul 24 03:48:16 2020 +0000 [SPARK-32237][SQL][3.0] Resolve hint in CTE ### What changes were proposed in this pull request? The backport of #29062 This PR is to move `Substitution` rule before `Hints` rule in `Analyzer` to avoid hint in CTE not working. ### Why are the changes needed? Below SQL in Spark3.0 will throw AnalysisException, but it works in Spark2.x ```sql WITH cte AS (SELECT /*+ REPARTITION(3) */ T.id, T.data FROM $t1 T) SELECT cte.id, cte.data FROM cte ``` ``` Failed to analyze query: org.apache.spark.sql.AnalysisException: cannot resolve '`cte.id`' given input columns: [cte.data, cte.id]; line 3 pos 7; 'Project ['cte.id, 'cte.data] +- SubqueryAlias cte +- Project [id#21L, data#22] +- SubqueryAlias T +- SubqueryAlias testcat.ns1.ns2.tbl +- RelationV2[id#21L, data#22] testcat.ns1.ns2.tbl 'Project ['cte.id, 'cte.data] +- SubqueryAlias cte +- Project [id#21L, data#22] +- SubqueryAlias T +- SubqueryAlias testcat.ns1.ns2.tbl +- RelationV2[id#21L, data#22] testcat.ns1.ns2.tbl ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add a unit test Closes #29201 from LantaoJin/SPARK-32237_branch-3.0. Lead-authored-by: LantaoJin <jinlan...@gmail.com> Co-authored-by: Alan Jin <jinlan...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../spark/sql/catalyst/analysis/Analyzer.scala | 10 ++++----- .../sql/catalyst/analysis/AnalysisSuite.scala | 25 +++++++++++++++++++++- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 12 +++++++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index bd5a797..89454c2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -200,16 +200,16 @@ class Analyzer( val postHocResolutionRules: Seq[Rule[LogicalPlan]] = Nil lazy val batches: Seq[Batch] = Seq( - Batch("Hints", fixedPoint, - new ResolveHints.ResolveJoinStrategyHints(conf), - new ResolveHints.ResolveCoalesceHints(conf)), - Batch("Simple Sanity Check", Once, - LookupFunctions), Batch("Substitution", fixedPoint, CTESubstitution, WindowsSubstitution, EliminateUnions, new SubstituteUnresolvedOrdinals(conf)), + Batch("Hints", fixedPoint, + new ResolveHints.ResolveJoinStrategyHints(conf), + new ResolveHints.ResolveCoalesceHints(conf)), + Batch("Simple Sanity Check", Once, + LookupFunctions), Batch("Resolution", fixedPoint, ResolveTableValuedFunctions :: ResolveNamespace(catalogManager) :: diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 8db2036..453a4e6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -25,7 +25,7 @@ import org.apache.log4j.Level import org.scalatest.Matchers import org.apache.spark.api.python.PythonEvalType -import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog} import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ @@ -879,4 +879,27 @@ class AnalysisSuite extends AnalysisTest with Matchers { Seq("Intersect can only be performed on tables with the compatible column types. " + "timestamp <> double at the second column of the second table")) } + + test("SPARK-32237: Hint in CTE") { + val plan = With( + Project( + Seq(UnresolvedAttribute("cte.a")), + UnresolvedRelation(TableIdentifier("cte")) + ), + Seq( + ( + "cte", + SubqueryAlias( + AliasIdentifier("cte"), + UnresolvedHint( + "REPARTITION", + Seq(Literal(3)), + Project(testRelation.output, testRelation) + ) + ) + ) + ) + ) + assertAnalysisSuccess(plan) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 6fab47d..f5dba4c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -3468,6 +3468,18 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-32237: Hint in CTE") { + withTable("t") { + sql("CREATE TABLE t USING PARQUET AS SELECT 1 AS id") + checkAnswer( + sql(""" + |WITH cte AS (SELECT /*+ REPARTITION(3) */ * FROM t) + |SELECT * FROM cte + """.stripMargin), + Row(1) :: Nil) + } + } + test("SPARK-32372: ResolveReferences.dedupRight should only rewrite attributes for ancestor " + "plans of the conflict plan") { sql("SELECT name, avg(age) as avg_age FROM person GROUP BY name") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org