This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new fc4643be31a [SPARK-39783][SQL] Quote qualifiedName to fix backticks for column candidates in error messages fc4643be31a is described below commit fc4643be31a463cc1096d37f71548f39e99ace32 Author: Enrico Minack <git...@enrico.minack.dev> AuthorDate: Tue Oct 18 15:04:55 2022 +0500 [SPARK-39783][SQL] Quote qualifiedName to fix backticks for column candidates in error messages ### What changes were proposed in this pull request? The `NamedExpression.qualifiedName` is a concatenation of qualifiers and the name, joined by `dots`. If those contain `dots`, the result `qualifiedName` is ambiguous. Quoting those if they contain `dots` fixes this, while this also fixes quoting column candidates in the error messages `UNRESOLVED_COLUMN` and `UNRESOLVED_MAP_KEY`: `UNRESOLVED_COLUMN`: ``` Seq((0)).toDF("the.id").select("the.id").show() ``` The error message should read org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN] A column or function parameter with name `the`.`id` cannot be resolved. Did you mean one of the following? [`the.id`]; while it was: org.apache.spark.sql.AnalysisException: [UNRESOLVED_COLUMN] A column or function parameter with name `the`.`id` cannot be resolved. Did you mean one of the following? [`the`.`id`]; `UNRESOLVED_MAP_KEY`: ``` Seq((0)).toDF("id") .select(map(lit("key"), lit(1)).as("map"), lit(2).as("other.column")) .select($"`map`"($"nonexisting")).show() ``` The error message should read Cannot resolve column `nonexisting` as a map key. If the key is a string literal, please add single quotes around it. Otherwise did you mean one of the following column(s)? [`map`, `other.column`]; while it was: Cannot resolve column `nonexisting` as a map key. If the key is a string literal, please add single quotes around it. Otherwise did you mean one of the following column(s)? [`map`, `other`.`column`]; ### Why are the changes needed? The current quoting is wrong and `qualifiedName` is ambiguous if `name` or `qualifiers` contain `dots`. ### Does this PR introduce _any_ user-facing change? It corrects the error message. ### How was this patch tested? This is tested in `AnalysisErrorSuite`, `DatasetSuite` and `QueryCompilationErrorsSuite.scala`. Closes #38256 from EnricoMi/branch-correct-backticks-error-message. Authored-by: Enrico Minack <git...@enrico.minack.dev> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../catalyst/expressions/namedExpressions.scala | 8 ++-- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 20 ++++++++++ .../sql/catalyst/analysis/TestRelations.scala | 2 + .../scala/org/apache/spark/sql/DatasetSuite.scala | 45 ++++++++++++++++++++++ .../org/apache/spark/sql/DatasetUnpivotSuite.scala | 3 +- .../sql/errors/QueryCompilationErrorsSuite.scala | 16 ++++++++ 7 files changed, 89 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 441e696bfb8..b185b38797b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -3479,7 +3479,7 @@ class Analyzer(override val catalogManager: CatalogManager) i.userSpecifiedCols.map { col => i.table.resolve(Seq(col), resolver).getOrElse { - val candidates = i.table.output.map(_.name) + val candidates = i.table.output.map(_.qualifiedName) val orderedCandidates = StringUtils.orderStringsBySimilarity(col, candidates) throw QueryCompilationErrors .unresolvedAttributeError("UNRESOLVED_COLUMN", col, orderedCandidates, i.origin) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala index 4181edcb8c6..99e5f411bdb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala @@ -71,11 +71,11 @@ trait NamedExpression extends Expression { def exprId: ExprId /** - * Returns a dot separated fully qualified name for this attribute. Given that there can be - * multiple qualifiers, it is possible that there are other possible way to refer to this - * attribute. + * Returns a dot separated fully qualified name for this attribute. If the name or any qualifier + * contains `dots`, it is quoted to avoid confusion. Given that there can be multiple qualifiers, + * it is possible that there are other possible way to refer to this attribute. */ - def qualifiedName: String = (qualifier :+ name).mkString(".") + def qualifiedName: String = (qualifier :+ name).map(quoteIfNeeded).mkString(".") /** * Optional qualifier for the expression. diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index a4bce2f2a6c..8b71bb05550 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -881,6 +881,26 @@ class AnalysisErrorSuite extends AnalysisTest { "[`a`, `b`, `c`, `d`, `e`]" :: Nil) + errorClassTest( + "SPARK-39783: backticks in error message for candidate column with dots", + // This selects a column that does not exist, + // the error message suggest the existing column with correct backticks + testRelation6.select($"`the`.`id`"), + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + messageParameters = Map( + "objectName" -> "`the`.`id`", + "proposal" -> "`the.id`")) + + errorClassTest( + "SPARK-39783: backticks in error message for candidate struct column", + // This selects a column that does not exist, + // the error message suggest the existing column with correct backticks + nestedRelation2.select($"`top.aField`"), + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + messageParameters = Map( + "objectName" -> "`top.aField`", + "proposal" -> "`top`")) + test("SPARK-35080: Unsupported correlated equality predicates in subquery") { val a = AttributeReference("a", IntegerType)() val b = AttributeReference("b", IntegerType)() diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala index 33b60290709..d54237fcc14 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala @@ -46,6 +46,8 @@ object TestRelations { val testRelation5 = LocalRelation(AttributeReference("i", StringType)()) + val testRelation6 = LocalRelation(AttributeReference("the.id", LongType)()) + val nestedRelation = LocalRelation( AttributeReference("top", StructType( StructField("duplicateField", StringType) :: diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 88ad54dc7b4..8f5740e65ed 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1994,6 +1994,51 @@ class DatasetSuite extends QueryTest } } + test("SPARK-39783: Fix error messages for columns with dots/periods") { + forAll(dotColumnTestModes) { (caseSensitive, colName) => + val ds = Seq(SpecialCharClass("1", "2")).toDS + withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) { + checkError( + exception = intercept[AnalysisException] { + // Note: ds(colName) "SPARK-25153: Improve error messages for columns with dots/periods" + // has different semantics than ds.select(colName) + ds.select(colName) + }, + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + sqlState = None, + parameters = Map( + "objectName" -> s"`${colName.replace(".", "`.`")}`", + "proposal" -> "`field.1`, `field 2`")) + } + } + } + + test("SPARK-39783: backticks in error message for candidate column with dots") { + checkError( + exception = intercept[AnalysisException] { + Seq(0).toDF("the.id").select("the.id") + }, + errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", + sqlState = None, + parameters = Map( + "objectName" -> "`the`.`id`", + "proposal" -> "`the.id`")) + } + + test("SPARK-39783: backticks in error message for map candidate key with dots") { + checkError( + exception = intercept[AnalysisException] { + spark.range(1) + .select(map(lit("key"), lit(1)).as("map"), lit(2).as("other.column")) + .select($"`map`"($"nonexisting")).show() + }, + errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION", + sqlState = None, + parameters = Map( + "objectName" -> "`nonexisting`", + "proposal" -> "`map`, `other.column`")) + } + test("groupBy.as") { val df1 = Seq(DoubleData(1, "one"), DoubleData(2, "two"), DoubleData(3, "three")).toDS() .repartition($"id").sortWithinPartitions("id") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala index f2f31851acb..3c05a7415a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala @@ -504,10 +504,9 @@ class DatasetUnpivotSuite extends QueryTest checkError( exception = e, errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION", - // expected message is wrong: https://issues.apache.org/jira/browse/SPARK-39783 parameters = Map( "objectName" -> "`an`.`id`", - "proposal" -> "`an`.`id`, `int1`, `long1`, `str`.`one`, `str`.`two`")) + "proposal" -> "`an.id`, `int1`, `long1`, `str.one`, `str.two`")) } test("unpivot with struct fields") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 75c157d4b88..8086a0e97ec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -419,6 +419,22 @@ class QueryCompilationErrorsSuite ) } + test("UNRESOLVED_MAP_KEY: proposal columns containing quoted dots") { + val query = "select m[a] from (select map('a', 'b') as m, 'aa' as `a.a`)" + checkError( + exception = intercept[AnalysisException] {sql(query)}, + errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION", + sqlState = None, + parameters = Map("objectName" -> "`a`", + "proposal" -> + "`__auto_generated_subquery_name`.`m`, `__auto_generated_subquery_name`.`a.a`"), + context = ExpectedContext( + fragment = "a", + start = 9, + stop = 9) + ) + } + test("UNRESOLVED_COLUMN: SELECT distinct does not work correctly " + "if order by missing attribute") { checkAnswer( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org