This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1f1d7964902 [SPARK-39497][SQL] Improve the analysis exception of missing map key column 1f1d7964902 is described below commit 1f1d79649027a4c03e48dea2bcef280dca53767a Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Mon Jun 20 10:35:22 2022 -0700 [SPARK-39497][SQL] Improve the analysis exception of missing map key column ### What changes were proposed in this pull request? Sometimes users forgot to add single quotes on the map key string literal, for example `map_col[a]`. In such a case, the Analyzer will throw an exception: ``` [MISSING_COLUMN] Column 'struct.a' does not exist. Did you mean one of the following? ... ``` We can improve this message by saying that the user should append single quotes if the map key is a string literal. ``` [UNRESOLVED_MAP_KEY] Cannot resolve column 'a' as a map key. If the key is a string literal, please add single quotes around it. Otherwise, did you mean one of the following column(s)? ... ``` ### Why are the changes needed? Error message improvement ### Does this PR introduce _any_ user-facing change? Yes but trivial, an improvement on the error message of unresolved map key column ### How was this patch tested? New UT Closes #36896 from gengliangwang/unreslovedMapKey. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- core/src/main/resources/error/error-classes.json | 8 ++++- .../spark/sql/catalyst/analysis/Analyzer.scala | 4 +-- .../sql/catalyst/analysis/CheckAnalysis.scala | 39 +++++++++++++++++----- .../expressions/complexTypeExtractors.scala | 2 +- .../spark/sql/errors/QueryCompilationErrors.scala | 9 +++-- .../sql/errors/QueryCompilationErrorsSuite.scala | 12 +++++++ 6 files changed, 58 insertions(+), 16 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index d4c0910c5ad..f9257b6c21b 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -352,6 +352,12 @@ ], "sqlState" : "42000" }, + "UNRESOLVED_MAP_KEY" : { + "message" : [ + "Cannot resolve column <columnName> as a map key. If the key is a string literal, please add single quotes around it. Otherwise, did you mean one of the following column(s)? [<proposal>]" + ], + "sqlState" : "42000" + }, "UNSUPPORTED_DATATYPE" : { "message" : [ "Unsupported data type <typeName>" @@ -556,4 +562,4 @@ ], "sqlState" : "40000" } -} \ No newline at end of file +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 931a0fcf77f..4d2dd175260 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -3420,8 +3420,8 @@ class Analyzer(override val catalogManager: CatalogManager) i.userSpecifiedCols.map { col => i.table.resolve(Seq(col), resolver).getOrElse( - throw QueryCompilationErrors.unresolvedColumnError( - col, i.table.output.map(_.name), i.origin)) + throw QueryCompilationErrors.unresolvedAttributeError( + "UNRESOLVED_COLUMN", col, i.table.output.map(_.name), i.origin)) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index f9f8b590a31..759683b8c00 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -91,6 +91,26 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { } } + private def isMapWithStringKey(e: Expression): Boolean = if (e.resolved) { + e.dataType match { + case m: MapType => m.keyType.isInstanceOf[StringType] + case _ => false + } + } else { + false + } + + private def failUnresolvedAttribute( + operator: LogicalPlan, + a: Attribute, + errorClass: String): Nothing = { + val missingCol = a.sql + val candidates = operator.inputSet.toSeq.map(_.qualifiedName) + val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol, candidates) + throw QueryCompilationErrors.unresolvedAttributeError( + errorClass, missingCol, orderedCandidates, a.origin) + } + def checkAnalysis(plan: LogicalPlan): Unit = { // We transform up and order the rules so as to catch the first possible failure instead // of the result of cascading resolution failures. Inline all CTEs in the plan to help check @@ -160,11 +180,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { throw QueryCompilationErrors.commandUnsupportedInV2TableError("SHOW TABLE EXTENDED") case operator: LogicalPlan => - // Check argument data types of higher-order functions downwards first. - // If the arguments of the higher-order functions are resolved but the type check fails, - // the argument functions will not get resolved, but we should report the argument type - // check failure instead of claiming the argument functions are unresolved. operator transformExpressionsDown { + // Check argument data types of higher-order functions downwards first. + // If the arguments of the higher-order functions are resolved but the type check fails, + // the argument functions will not get resolved, but we should report the argument type + // check failure instead of claiming the argument functions are unresolved. case hof: HigherOrderFunction if hof.argumentsResolved && hof.checkArgumentDataTypes().isFailure => hof.checkArgumentDataTypes() match { @@ -172,15 +192,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { hof.failAnalysis( s"cannot resolve '${hof.sql}' due to argument data type mismatch: $message") } + + // If an attribute can't be resolved as a map key of string type, either the key should be + // surrounded with single quotes, or there is a typo in the attribute name. + case GetMapValue(map, key: Attribute, _) if isMapWithStringKey(map) && !key.resolved => + failUnresolvedAttribute(operator, key, "UNRESOLVED_MAP_KEY") } getAllExpressions(operator).foreach(_.foreachUp { case a: Attribute if !a.resolved => - val missingCol = a.sql - val candidates = operator.inputSet.toSeq.map(_.qualifiedName) - val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol, candidates) - throw QueryCompilationErrors.unresolvedColumnError( - missingCol, orderedCandidates, a.origin) + failUnresolvedAttribute(operator, a, "UNRESOLVED_COLUMN") case s: Star => withPosition(s) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index b2db00cd2b4..198fd0cd1f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -461,7 +461,7 @@ case class GetMapValue( @transient private lazy val ordering: Ordering[Any] = TypeUtils.getInterpretedOrdering(keyType) - private def keyType = child.dataType.asInstanceOf[MapType].keyType + private[catalyst] def keyType = child.dataType.asInstanceOf[MapType].keyType override def checkInputDataTypes(): TypeCheckResult = { super.checkInputDataTypes() match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 4ee53c56f69..7ed5c785771 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -144,11 +144,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { s"side of the join. The $side-side columns: [${plan.output.map(_.name).mkString(", ")}]") } - def unresolvedColumnError( - colName: String, candidates: Seq[String], origin: Origin): Throwable = { + def unresolvedAttributeError( + errorClass: String, + colName: String, + candidates: Seq[String], + origin: Origin): Throwable = { val candidateIds = candidates.map(candidate => toSQLId(candidate)) new AnalysisException( - errorClass = "UNRESOLVED_COLUMN", + errorClass = errorClass, messageParameters = Array(toSQLId(colName), candidateIds.mkString(", ")), origin = origin) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 06e6bec3fd1..bab5a106828 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -401,6 +401,18 @@ class QueryCompilationErrorsSuite ) } + test("UNRESOLVED_MAP_KEY: string type literal should be quoted") { + checkAnswer(sql("select m['a'] from (select map('a', 'b') as m, 'aa' as aa)"), Row("b")) + checkError( + exception = intercept[AnalysisException] { + sql("select m[a] from (select map('a', 'b') as m, 'aa' as aa)") + }, + errorClass = "UNRESOLVED_MAP_KEY", + parameters = Map("columnName" -> "`a`", + "proposal" -> + "`__auto_generated_subquery_name`.`m`, `__auto_generated_subquery_name`.`aa`")) + } + test("UNRESOLVED_COLUMN: SELECT distinct does not work correctly " + "if order by missing attribute") { checkAnswer( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org