This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 29dee664dd0 [SPARK-41144][SQL] Unresolved hint should not cause query failure 29dee664dd0 is described below commit 29dee664dd0d3296318d3551577c868320cbbf78 Author: ulysses-you <ulyssesyo...@gmail.com> AuthorDate: Tue Nov 15 16:49:03 2022 +0800 [SPARK-41144][SQL] Unresolved hint should not cause query failure Skip `UnresolvedHint` in rule `AddMetadataColumns` to avoid call exprId on `UnresolvedAttribute`. ``` CREATE TABLE t1(c1 bigint) USING PARQUET; CREATE TABLE t2(c2 bigint) USING PARQUET; SELECT /*+ hash(t2) */ * FROM t1 join t2 on c1 = c2; ``` failed with msg: ``` org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to exprId on unresolved object at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.exprId(unresolved.scala:147) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$4(Analyzer.scala:1005) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$4$adapted(Analyzer.scala:1005) at scala.collection.Iterator.exists(Iterator.scala:969) at scala.collection.Iterator.exists$(Iterator.scala:967) at scala.collection.AbstractIterator.exists(Iterator.scala:1431) at scala.collection.IterableLike.exists(IterableLike.scala:79) at scala.collection.IterableLike.exists$(IterableLike.scala:78) at scala.collection.AbstractIterable.exists(Iterable.scala:56) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$3(Analyzer.scala:1005) at org.apache.spark.sql.catalyst.analysis.Analyzer$AddMetadataColumns$.$anonfun$hasMetadataCol$3$adapted(Analyzer.scala:1005) ``` But before just a warning: `WARN HintErrorLogger: Unrecognized hint: hash(t2)` yes, fix regression from 3.3.1. Note, the root reason is we mark `UnresolvedHint` is resolved if child is resolved since https://github.com/apache/spark/pull/32841, then https://github.com/apache/spark/pull/37758 trigger this bug. add test Closes #38662 from ulysses-you/hint. Authored-by: ulysses-you <ulyssesyo...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit a9bf5d2b3f5b3331e3b024a3ad631fcbe88a9d18) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 1 + sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ad40f924ef8..2a2fe6f2957 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -923,6 +923,7 @@ class Analyzer(override val catalogManager: CatalogManager) def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsDownWithPruning( AlwaysProcess.fn, ruleId) { + case hint: UnresolvedHint => hint // Add metadata output to all node types case node if node.children.nonEmpty && node.resolved && hasMetadataCol(node) => val inputAttrs = AttributeSet(node.children.flatMap(_.output)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 0a3107cdff6..5b42d05c237 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -4564,6 +4564,14 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } } + + test("SPARK-41144: Unresolved hint should not cause query failure") { + withTable("t1", "t2") { + sql("CREATE TABLE t1(c1 bigint) USING PARQUET") + sql("CREATE TABLE t2(c2 bigint) USING PARQUET") + sql("SELECT /*+ hash(t2) */ * FROM t1 join t2 on c1 = c2") + } + } } case class Foo(bar: Option[String]) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org