This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 62f8ce40ddb [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class 62f8ce40ddb is described below commit 62f8ce40ddbf76ce86fd5e51cc73c67d66e12f48 Author: panbingkun <pbk1...@gmail.com> AuthorDate: Sat Nov 19 20:31:38 2022 +0300 [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class ### What changes were proposed in this pull request? The pr aims to migrate the ambiguous ref error to an error class. ### Why are the changes needed? The changes improve the error framework. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #38721 from panbingkun/SPARK-41172. Authored-by: panbingkun <pbk1...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 5 + .../spark/sql/catalyst/expressions/package.scala | 5 +- .../spark/sql/errors/QueryCompilationErrors.scala | 9 ++ .../sql/catalyst/analysis/AnalysisSuite.scala | 5 +- .../catalyst/analysis/ResolveSubquerySuite.scala | 4 +- .../expressions/AttributeResolutionSuite.scala | 30 +++-- .../results/columnresolution-negative.sql.out | 135 +++++++++++++++++++-- .../sql-tests/results/postgreSQL/join.sql.out | 30 ++++- .../results/postgreSQL/select_implicit.sql.out | 45 ++++++- .../results/udf/postgreSQL/udf-join.sql.out | 30 ++++- .../udf/postgreSQL/udf-select_implicit.sql.out | 45 ++++++- .../spark/sql/DataFrameNaFunctionsSuite.scala | 42 +++++-- .../org/apache/spark/sql/DataFrameStatSuite.scala | 52 ++++++-- .../execution/command/PlanResolutionSuite.scala | 22 ++-- .../execution/datasources/orc/OrcFilterSuite.scala | 20 ++- 15 files changed, 406 insertions(+), 73 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index fe340c517a2..4da9d2f9fbc 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -5,6 +5,11 @@ ], "sqlState" : "42000" }, + "AMBIGUOUS_REFERENCE" : { + "message" : [ + "Reference <name> is ambiguous, could be: <referenceNames>." + ] + }, "ARITHMETIC_OVERFLOW" : { "message" : [ "<message>.<alternative> If necessary set <config> to \"false\" to bypass this error." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala index 7913f396120..ededac3d917 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala @@ -21,9 +21,9 @@ import java.util.Locale import com.google.common.collect.Maps -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute} import org.apache.spark.sql.catalyst.util.MetadataColumnHelper +import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.types.{StructField, StructType} /** @@ -368,8 +368,7 @@ package object expressions { case ambiguousReferences => // More than one match. - val referenceNames = ambiguousReferences.map(_.qualifiedName).mkString(", ") - throw new AnalysisException(s"Reference '$name' is ambiguous, could be: $referenceNames.") + throw QueryCompilationErrors.ambiguousReferenceError(name, ambiguousReferences) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 22b4cfdb3c6..cbdbb6adc11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1834,6 +1834,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { "n" -> numMatches.toString)) } + def ambiguousReferenceError(name: String, ambiguousReferences: Seq[Attribute]): Throwable = { + new AnalysisException( + errorClass = "AMBIGUOUS_REFERENCE", + messageParameters = Map( + "name" -> toSQLId(name), + "referenceNames" -> + ambiguousReferences.map(ar => toSQLId(ar.qualifiedName)).sorted.mkString("[", ", ", "]"))) + } + def cannotUseIntervalTypeInTableSchemaError(): Throwable = { new AnalysisException( errorClass = "_LEGACY_ERROR_TEMP_1183", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala index 2d6b3afb749..8b303ec3bb1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala @@ -681,7 +681,7 @@ class AnalysisSuite extends AnalysisTest with Matchers { test("SPARK-34741: Avoid ambiguous reference in MergeIntoTable") { val cond = $"a" > 1 - assertAnalysisError( + assertAnalysisErrorClass( MergeIntoTable( testRelation, testRelation, @@ -690,7 +690,8 @@ class AnalysisSuite extends AnalysisTest with Matchers { Nil, Nil ), - "Reference 'a' is ambiguous" :: Nil) + "AMBIGUOUS_REFERENCE", + Map("name" -> "`a`", "referenceNames" -> "[`a`, `a`]")) } test("SPARK-24488 Generator with multiple aliases") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala index f4e5cf91188..577f663d8b1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala @@ -81,7 +81,9 @@ class ResolveSubquerySuite extends AnalysisTest { test("lateral join with ambiguous join conditions") { val plan = lateralJoin(t1, t0.select($"b"), condition = Some($"b" === 1)) - assertAnalysisError(plan, "Reference 'b' is ambiguous, could be: b, b." :: Nil) + assertAnalysisErrorClass(plan, + "AMBIGUOUS_REFERENCE", Map("name" -> "`b`", "referenceNames" -> "[`b`, `b`]") + ) } test("prefer resolving lateral subquery attributes from the inner query") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala index a3885ac77f3..71fa60b0c03 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala @@ -65,11 +65,16 @@ class AttributeResolutionSuite extends SparkFunSuite { AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t1")), AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "ns2", "t2"))) - val ex = intercept[AnalysisException] { - attrs.resolve(Seq("a"), resolver) - } - assert(ex.getMessage.contains( - "Reference 'a' is ambiguous, could be: ns1.t1.a, ns1.ns2.t2.a.")) + checkError( + exception = intercept[AnalysisException] { + attrs.resolve(Seq("a"), resolver) + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`a`", + "referenceNames" -> "[`ns1`.`ns2`.`t2`.`a`, `ns1`.`t1`.`a`]" + ) + ) } test("attribute resolution ambiguity at the qualifier level") { @@ -77,11 +82,16 @@ class AttributeResolutionSuite extends SparkFunSuite { AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t")), AttributeReference("a", IntegerType)(qualifier = Seq("ns2", "ns1", "t"))) - val ex = intercept[AnalysisException] { - attrs.resolve(Seq("ns1", "t", "a"), resolver) - } - assert(ex.getMessage.contains( - "Reference 'ns1.t.a' is ambiguous, could be: ns1.t.a, ns2.ns1.t.a.")) + checkError( + exception = intercept[AnalysisException] { + attrs.resolve(Seq("ns1", "t", "a"), resolver) + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`ns1`.`t`.`a`", + "referenceNames" -> "[`ns1`.`t`.`a`, `ns2`.`ns1`.`t`.`a`]" + ) + ) } test("attribute resolution with nested fields") { diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out index 03a3d75aa95..aafd9140385 100644 --- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out @@ -69,7 +69,20 @@ SELECT i1 FROM t1, mydb1.t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`i1`", + "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb1`.`t1`.`i1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 9, + "fragment" : "i1" + } ] +} -- !query @@ -78,7 +91,20 @@ SELECT t1.i1 FROM t1, mydb1.t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`t1`.`i1`", + "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb1`.`t1`.`i1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 12, + "fragment" : "t1.i1" + } ] +} -- !query @@ -87,7 +113,20 @@ SELECT mydb1.t1.i1 FROM t1, mydb1.t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'mydb1.t1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`mydb1`.`t1`.`i1`", + "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb1`.`t1`.`i1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 18, + "fragment" : "mydb1.t1.i1" + } ] +} -- !query @@ -96,7 +135,20 @@ SELECT i1 FROM t1, mydb2.t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`i1`", + "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 9, + "fragment" : "i1" + } ] +} -- !query @@ -105,7 +157,20 @@ SELECT t1.i1 FROM t1, mydb2.t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`t1`.`i1`", + "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 12, + "fragment" : "t1.i1" + } ] +} -- !query @@ -122,7 +187,20 @@ SELECT i1 FROM t1, mydb1.t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`i1`", + "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 9, + "fragment" : "i1" + } ] +} -- !query @@ -131,7 +209,20 @@ SELECT t1.i1 FROM t1, mydb1.t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`t1`.`i1`", + "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 12, + "fragment" : "t1.i1" + } ] +} -- !query @@ -140,7 +231,20 @@ SELECT i1 FROM t1, mydb2.t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`i1`", + "referenceNames" : "[`spark_catalog`.`mydb2`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 9, + "fragment" : "i1" + } ] +} -- !query @@ -149,7 +253,20 @@ SELECT t1.i1 FROM t1, mydb2.t1 struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`t1`.`i1`", + "referenceNames" : "[`spark_catalog`.`mydb2`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 12, + "fragment" : "t1.i1" + } ] +} -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out index 04c1d47af92..6746efd0809 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out @@ -546,7 +546,20 @@ SELECT '' AS `xxx`, i, k, t struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'i' is ambiguous, could be: spark_catalog.default.j1_tbl.i, spark_catalog.default.j2_tbl.i.; line 1 pos 20 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`i`", + "referenceNames" : "[`spark_catalog`.`default`.`j1_tbl`.`i`, `spark_catalog`.`default`.`j2_tbl`.`i`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 21, + "stopIndex" : 21, + "fragment" : "i" + } ] +} -- !query @@ -3235,7 +3248,20 @@ select * from struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 63 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`f1`", + "referenceNames" : "[`j`.`f1`, `j`.`f1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 78, + "stopIndex" : 79, + "fragment" : "f1" + } ] +} -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out index 0b53a9024ef..1e216298d51 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out @@ -239,7 +239,20 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`b`", + "referenceNames" : "[`x`.`b`, `y`.`b`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 94, + "stopIndex" : 94, + "fragment" : "b" + } ] +} -- !query @@ -429,7 +442,20 @@ SELECT count(x.a) FROM test_missing_target x, test_missing_target y struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`b`", + "referenceNames" : "[`x`.`b`, `y`.`b`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 96, + "stopIndex" : 96, + "fragment" : "b" + } ] +} -- !query @@ -453,7 +479,20 @@ SELECT count(b) FROM test_missing_target x, test_missing_target y struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`b`", + "referenceNames" : "[`x`.`b`, `y`.`b`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 14, + "stopIndex" : 14, + "fragment" : "b" + } ] +} -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out index 363e5d0b117..c30321c76db 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out @@ -546,7 +546,20 @@ SELECT udf('') AS `xxx`, udf(i) AS i, udf(k), udf(t) AS t struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'i' is ambiguous, could be: spark_catalog.default.j1_tbl.i, spark_catalog.default.j2_tbl.i.; line 1 pos 29 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`i`", + "referenceNames" : "[`spark_catalog`.`default`.`j1_tbl`.`i`, `spark_catalog`.`default`.`j2_tbl`.`i`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 30, + "stopIndex" : 30, + "fragment" : "i" + } ] +} -- !query @@ -3263,7 +3276,20 @@ select * from struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 72 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`f1`", + "referenceNames" : "[`j`.`f1`, `j`.`f1`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 87, + "stopIndex" : 88, + "fragment" : "f1" + } ] +} -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out index 412013d9527..283d5a48ba4 100755 --- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out @@ -242,7 +242,20 @@ SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`b`", + "referenceNames" : "[`x`.`b`, `y`.`b`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 113, + "stopIndex" : 113, + "fragment" : "b" + } ] +} -- !query @@ -432,7 +445,20 @@ SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`b`", + "referenceNames" : "[`x`.`b`, `y`.`b`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 120, + "stopIndex" : 120, + "fragment" : "b" + } ] +} -- !query @@ -457,7 +483,20 @@ SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y struct<> -- !query output org.apache.spark.sql.AnalysisException -Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21 +{ + "errorClass" : "AMBIGUOUS_REFERENCE", + "messageParameters" : { + "name" : "`b`", + "referenceNames" : "[`x`.`b`, `y`.`b`]" + }, + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 22, + "stopIndex" : 22, + "fragment" : "b" + } ] +} -- !query diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala index 8dbc57c0429..b83a8850fbe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala @@ -279,10 +279,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession { val (df1, df2) = createDFsWithSameFieldsName() val joined_df = df1.join(df2, Seq("f1"), joinType = "left_outer") - val message = intercept[AnalysisException] { - joined_df.na.fill("", cols = Seq("f2")) - }.getMessage - assert(message.contains("Reference 'f2' is ambiguous")) + checkError( + exception = intercept[AnalysisException] { + joined_df.na.fill("", cols = Seq("f2")) + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`f2`", + "referenceNames" -> "[`f2`, `f2`]" + ) + ) } test("fill with col(*)") { @@ -397,10 +403,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession { val df = left.join(right, Seq("col1")) // If column names are specified, the following fails due to ambiguity. - val exception = intercept[AnalysisException] { - df.na.fill("hello", Seq("col2")) - } - assert(exception.getMessage.contains("Reference 'col2' is ambiguous")) + checkError( + exception = intercept[AnalysisException] { + df.na.fill("hello", Seq("col2")) + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`col2`", + "referenceNames" -> "[`col2`, `col2`]" + ) + ) // If column names are not specified, fill() is applied to all the eligible columns. checkAnswer( @@ -414,10 +426,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession { val df = left.join(right, Seq("col1")) // If column names are specified, the following fails due to ambiguity. - val exception = intercept[AnalysisException] { - df.na.drop("any", Seq("col2")) - } - assert(exception.getMessage.contains("Reference 'col2' is ambiguous")) + checkError( + exception = intercept[AnalysisException] { + df.na.drop("any", Seq("col2")) + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`col2`", + "referenceNames" -> "[`col2`, `col2`]" + ) + ) // If column names are not specified, drop() is applied to all the eligible rows. checkAnswer( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala index ceb1a75e83d..47ff942e5ca 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala @@ -138,18 +138,46 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession { assert(dfx.stat.freqItems(Array("table1.num", "table2.num")).collect()(0).length == 2) // this should throw "Reference 'num' is ambiguous" - intercept[AnalysisException] { - dfx.stat.freqItems(Array("num")) - } - intercept[AnalysisException] { - dfx.stat.approxQuantile("num", Array(0.1), 0.0) - } - intercept[AnalysisException] { - dfx.stat.cov("num", "num") - } - intercept[AnalysisException] { - dfx.stat.corr("num", "num") - } + checkError( + exception = intercept[AnalysisException] { + dfx.stat.freqItems(Array("num")) + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`num`", + "referenceNames" -> "[`table1`.`num`, `table2`.`num`]" + ) + ) + checkError( + exception = intercept[AnalysisException] { + dfx.stat.approxQuantile("num", Array(0.1), 0.0) + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`num`", + "referenceNames" -> "[`table1`.`num`, `table2`.`num`]" + ) + ) + checkError( + exception = intercept[AnalysisException] { + dfx.stat.cov("num", "num") + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`num`", + "referenceNames" -> "[`table1`.`num`, `table2`.`num`]" + ) + ) + checkError( + exception = intercept[AnalysisException] { + dfx.stat.corr("num", "num") + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`num`", + "referenceNames" -> "[`table1`.`num`, `table2`.`num`]" + ) + ) } test("SPARK-40933 test cov & corr with null values and empty dataset") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala index 80f258c4659..3202ef728e5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala @@ -1929,10 +1929,14 @@ class PlanResolutionSuite extends AnalysisTest { // no aliases Seq(("v2Table", "v2Table1"), ("testcat.tab", "testcat.tab1")).foreach { pair => + def referenceNames(target: String, column: String): String = target match { + case "v2Table" => s"[`spark_catalog`.`default`.`v2Table1`.`$column`, " + + s"`spark_catalog`.`default`.`v2Table`.`$column`]" + case "testcat.tab" => s"[`testcat`.`tab1`.`$column`, `testcat`.`tab`.`$column`]" + } val target = pair._1 val source = pair._2 - val sql1 = s""" |MERGE INTO $target @@ -1986,8 +1990,8 @@ class PlanResolutionSuite extends AnalysisTest { // resolve column `i` as it's ambiguous. checkError( exception = intercept[AnalysisException](parseAndResolve(sql2)), - errorClass = null, - parameters = Map.empty, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map("name" -> "`i`", "referenceNames" -> referenceNames(target, "i")), context = ExpectedContext( fragment = "i", start = 22 + target.length + source.length, @@ -2002,8 +2006,8 @@ class PlanResolutionSuite extends AnalysisTest { // resolve column `s` as it's ambiguous. checkError( exception = intercept[AnalysisException](parseAndResolve(sql3)), - errorClass = null, - parameters = Map.empty, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")), context = ExpectedContext( fragment = "s", start = 46 + target.length + source.length, @@ -2018,8 +2022,8 @@ class PlanResolutionSuite extends AnalysisTest { // resolve column `s` as it's ambiguous. checkError( exception = intercept[AnalysisException](parseAndResolve(sql4)), - errorClass = null, - parameters = Map.empty, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")), context = ExpectedContext( fragment = "s", start = 46 + target.length + source.length, @@ -2034,8 +2038,8 @@ class PlanResolutionSuite extends AnalysisTest { // resolve column `s` as it's ambiguous. checkError( exception = intercept[AnalysisException](parseAndResolve(sql5)), - errorClass = null, - parameters = Map.empty, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")), context = ExpectedContext( fragment = "s", start = 61 + target.length + source.length, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala index aa0051a54af..a7f9da84c1f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala @@ -674,11 +674,21 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { // Exception thrown for ambiguous case. withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - val e = intercept[AnalysisException] { - sql(s"select a from $tableName where a < 0").collect() - } - assert(e.getMessage.contains( - "Reference 'a' is ambiguous")) + checkError( + exception = intercept[AnalysisException] { + sql(s"select a from $tableName where a < 0").collect() + }, + errorClass = "AMBIGUOUS_REFERENCE", + parameters = Map( + "name" -> "`a`", + "referenceNames" -> ("[`spark_catalog`.`default`.`spark_32622`.`a`, " + + "`spark_catalog`.`default`.`spark_32622`.`a`]")), + context = ExpectedContext( + fragment = "a", + start = 32, + stop = 32 + ) + ) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org