[spark] branch master updated: [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class

maxgekk Sat, 19 Nov 2022 09:32:00 -0800

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 62f8ce40ddb [SPARK-41172][SQL] Migrate the ambiguous ref error to an 
error class
62f8ce40ddb is described below

commit 62f8ce40ddbf76ce86fd5e51cc73c67d66e12f48
Author: panbingkun <pbk1...@gmail.com>
AuthorDate: Sat Nov 19 20:31:38 2022 +0300

    [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class
    
    ### What changes were proposed in this pull request?
    The pr aims to migrate the ambiguous ref error to an error class.
    
    ### Why are the changes needed?
    The changes improve the error framework.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    Pass GA.
    
    Closes #38721 from panbingkun/SPARK-41172.
    
    Authored-by: panbingkun <pbk1...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   |   5 +
 .../spark/sql/catalyst/expressions/package.scala   |   5 +-
 .../spark/sql/errors/QueryCompilationErrors.scala  |   9 ++
 .../sql/catalyst/analysis/AnalysisSuite.scala      |   5 +-
 .../catalyst/analysis/ResolveSubquerySuite.scala   |   4 +-
 .../expressions/AttributeResolutionSuite.scala     |  30 +++--
 .../results/columnresolution-negative.sql.out      | 135 +++++++++++++++++++--
 .../sql-tests/results/postgreSQL/join.sql.out      |  30 ++++-
 .../results/postgreSQL/select_implicit.sql.out     |  45 ++++++-
 .../results/udf/postgreSQL/udf-join.sql.out        |  30 ++++-
 .../udf/postgreSQL/udf-select_implicit.sql.out     |  45 ++++++-
 .../spark/sql/DataFrameNaFunctionsSuite.scala      |  42 +++++--
 .../org/apache/spark/sql/DataFrameStatSuite.scala  |  52 ++++++--
 .../execution/command/PlanResolutionSuite.scala    |  22 ++--
 .../execution/datasources/orc/OrcFilterSuite.scala |  20 ++-
 15 files changed, 406 insertions(+), 73 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index fe340c517a2..4da9d2f9fbc 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -5,6 +5,11 @@
     ],
     "sqlState" : "42000"
   },
+  "AMBIGUOUS_REFERENCE" : {
+    "message" : [
+      "Reference <name> is ambiguous, could be: <referenceNames>."
+    ]
+  },
   "ARITHMETIC_OVERFLOW" : {
     "message" : [
       "<message>.<alternative> If necessary set <config> to \"false\" to 
bypass this error."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 7913f396120..ededac3d917 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -21,9 +21,9 @@ import java.util.Locale
 
 import com.google.common.collect.Maps
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.util.MetadataColumnHelper
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
@@ -368,8 +368,7 @@ package object expressions  {
 
         case ambiguousReferences =>
           // More than one match.
-          val referenceNames = 
ambiguousReferences.map(_.qualifiedName).mkString(", ")
-          throw new AnalysisException(s"Reference '$name' is ambiguous, could 
be: $referenceNames.")
+          throw QueryCompilationErrors.ambiguousReferenceError(name, 
ambiguousReferences)
       }
     }
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 22b4cfdb3c6..cbdbb6adc11 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1834,6 +1834,15 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
         "n" -> numMatches.toString))
   }
 
+  def ambiguousReferenceError(name: String, ambiguousReferences: 
Seq[Attribute]): Throwable = {
+    new AnalysisException(
+      errorClass = "AMBIGUOUS_REFERENCE",
+      messageParameters = Map(
+        "name" -> toSQLId(name),
+        "referenceNames" ->
+          ambiguousReferences.map(ar => 
toSQLId(ar.qualifiedName)).sorted.mkString("[", ", ", "]")))
+  }
+
   def cannotUseIntervalTypeInTableSchemaError(): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1183",
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 2d6b3afb749..8b303ec3bb1 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -681,7 +681,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
   test("SPARK-34741: Avoid ambiguous reference in MergeIntoTable") {
     val cond = $"a" > 1
-    assertAnalysisError(
+    assertAnalysisErrorClass(
       MergeIntoTable(
         testRelation,
         testRelation,
@@ -690,7 +690,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         Nil,
         Nil
       ),
-      "Reference 'a' is ambiguous" :: Nil)
+      "AMBIGUOUS_REFERENCE",
+      Map("name" -> "`a`", "referenceNames" -> "[`a`, `a`]"))
   }
 
   test("SPARK-24488 Generator with multiple aliases") {
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
index f4e5cf91188..577f663d8b1 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
@@ -81,7 +81,9 @@ class ResolveSubquerySuite extends AnalysisTest {
 
   test("lateral join with ambiguous join conditions") {
     val plan = lateralJoin(t1, t0.select($"b"), condition = Some($"b" ===  1))
-    assertAnalysisError(plan, "Reference 'b' is ambiguous, could be: b, b." :: 
Nil)
+    assertAnalysisErrorClass(plan,
+      "AMBIGUOUS_REFERENCE", Map("name" -> "`b`", "referenceNames" -> "[`b`, 
`b`]")
+    )
   }
 
   test("prefer resolving lateral subquery attributes from the inner query") {
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
index a3885ac77f3..71fa60b0c03 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
@@ -65,11 +65,16 @@ class AttributeResolutionSuite extends SparkFunSuite {
       AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t1")),
       AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "ns2", 
"t2")))
 
-    val ex = intercept[AnalysisException] {
-      attrs.resolve(Seq("a"), resolver)
-    }
-    assert(ex.getMessage.contains(
-      "Reference 'a' is ambiguous, could be: ns1.t1.a, ns1.ns2.t2.a."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        attrs.resolve(Seq("a"), resolver)
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`a`",
+        "referenceNames" -> "[`ns1`.`ns2`.`t2`.`a`, `ns1`.`t1`.`a`]"
+      )
+    )
   }
 
   test("attribute resolution ambiguity at the qualifier level") {
@@ -77,11 +82,16 @@ class AttributeResolutionSuite extends SparkFunSuite {
       AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t")),
       AttributeReference("a", IntegerType)(qualifier = Seq("ns2", "ns1", "t")))
 
-    val ex = intercept[AnalysisException] {
-      attrs.resolve(Seq("ns1", "t", "a"), resolver)
-    }
-    assert(ex.getMessage.contains(
-      "Reference 'ns1.t.a' is ambiguous, could be: ns1.t.a, ns2.ns1.t.a."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        attrs.resolve(Seq("ns1", "t", "a"), resolver)
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`ns1`.`t`.`a`",
+        "referenceNames" -> "[`ns1`.`t`.`a`, `ns2`.`ns1`.`t`.`a`]"
+      )
+    )
   }
 
   test("attribute resolution with nested fields") {
diff --git 
a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
index 03a3d75aa95..aafd9140385 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
@@ -69,7 +69,20 @@ SELECT i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, 
spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, 
`spark_catalog`.`mydb1`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 9,
+    "fragment" : "i1"
+  } ]
+}
 
 
 -- !query
@@ -78,7 +91,20 @@ SELECT t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, 
spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, 
`spark_catalog`.`mydb1`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -87,7 +113,20 @@ SELECT mydb1.t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'mydb1.t1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, 
spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`mydb1`.`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, 
`spark_catalog`.`mydb1`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "mydb1.t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -96,7 +135,20 @@ SELECT i1 FROM t1, mydb2.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, 
spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, 
`spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 9,
+    "fragment" : "i1"
+  } ]
+}
 
 
 -- !query
@@ -105,7 +157,20 @@ SELECT t1.i1 FROM t1, mydb2.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, 
spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, 
`spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -122,7 +187,20 @@ SELECT i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, 
spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, 
`spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 9,
+    "fragment" : "i1"
+  } ]
+}
 
 
 -- !query
@@ -131,7 +209,20 @@ SELECT t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, 
spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, 
`spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -140,7 +231,20 @@ SELECT i1 FROM t1, mydb2.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, 
spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb2`.`t1`.`i1`, 
`spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 9,
+    "fragment" : "i1"
+  } ]
+}
 
 
 -- !query
@@ -149,7 +253,20 @@ SELECT t1.i1 FROM t1, mydb2.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, 
spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb2`.`t1`.`i1`, 
`spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "t1.i1"
+  } ]
+}
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
index 04c1d47af92..6746efd0809 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
@@ -546,7 +546,20 @@ SELECT '' AS `xxx`, i, k, t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i' is ambiguous, could be: spark_catalog.default.j1_tbl.i, 
spark_catalog.default.j2_tbl.i.; line 1 pos 20
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`i`",
+    "referenceNames" : "[`spark_catalog`.`default`.`j1_tbl`.`i`, 
`spark_catalog`.`default`.`j2_tbl`.`i`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 21,
+    "stopIndex" : 21,
+    "fragment" : "i"
+  } ]
+}
 
 
 -- !query
@@ -3235,7 +3248,20 @@ select * from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 63
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`f1`",
+    "referenceNames" : "[`j`.`f1`, `j`.`f1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 78,
+    "stopIndex" : 79,
+    "fragment" : "f1"
+  } ]
+}
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
index 0b53a9024ef..1e216298d51 100755
--- 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
@@ -239,7 +239,20 @@ SELECT count(*) FROM test_missing_target x, 
test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 94,
+    "stopIndex" : 94,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -429,7 +442,20 @@ SELECT count(x.a) FROM test_missing_target x, 
test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 96,
+    "stopIndex" : 96,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -453,7 +479,20 @@ SELECT count(b) FROM test_missing_target x, 
test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 14,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out 
b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
index 363e5d0b117..c30321c76db 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
@@ -546,7 +546,20 @@ SELECT udf('') AS `xxx`, udf(i) AS i, udf(k), udf(t) AS t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i' is ambiguous, could be: spark_catalog.default.j1_tbl.i, 
spark_catalog.default.j2_tbl.i.; line 1 pos 29
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`i`",
+    "referenceNames" : "[`spark_catalog`.`default`.`j1_tbl`.`i`, 
`spark_catalog`.`default`.`j2_tbl`.`i`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 30,
+    "fragment" : "i"
+  } ]
+}
 
 
 -- !query
@@ -3263,7 +3276,20 @@ select * from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 72
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`f1`",
+    "referenceNames" : "[`j`.`f1`, `j`.`f1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 87,
+    "stopIndex" : 88,
+    "fragment" : "f1"
+  } ]
+}
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
index 412013d9527..283d5a48ba4 100755
--- 
a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
@@ -242,7 +242,20 @@ SELECT udf(count(*)) FROM test_missing_target x, 
test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 113,
+    "stopIndex" : 113,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -432,7 +445,20 @@ SELECT udf(count(udf(x.a))) FROM test_missing_target x, 
test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 120,
+    "stopIndex" : 120,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -457,7 +483,20 @@ SELECT udf(count(udf(b))) FROM test_missing_target x, 
test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 22,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index 8dbc57c0429..b83a8850fbe 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -279,10 +279,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with 
SharedSparkSession {
     val (df1, df2) = createDFsWithSameFieldsName()
     val joined_df = df1.join(df2, Seq("f1"), joinType = "left_outer")
 
-    val message = intercept[AnalysisException] {
-      joined_df.na.fill("", cols = Seq("f2"))
-    }.getMessage
-    assert(message.contains("Reference 'f2' is ambiguous"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        joined_df.na.fill("", cols = Seq("f2"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`f2`",
+        "referenceNames" -> "[`f2`, `f2`]"
+      )
+    )
   }
 
   test("fill with col(*)") {
@@ -397,10 +403,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with 
SharedSparkSession {
     val df = left.join(right, Seq("col1"))
 
     // If column names are specified, the following fails due to ambiguity.
-    val exception = intercept[AnalysisException] {
-      df.na.fill("hello", Seq("col2"))
-    }
-    assert(exception.getMessage.contains("Reference 'col2' is ambiguous"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.na.fill("hello", Seq("col2"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`col2`",
+        "referenceNames" -> "[`col2`, `col2`]"
+      )
+    )
 
     // If column names are not specified, fill() is applied to all the 
eligible columns.
     checkAnswer(
@@ -414,10 +426,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with 
SharedSparkSession {
     val df = left.join(right, Seq("col1"))
 
     // If column names are specified, the following fails due to ambiguity.
-    val exception = intercept[AnalysisException] {
-      df.na.drop("any", Seq("col2"))
-    }
-    assert(exception.getMessage.contains("Reference 'col2' is ambiguous"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.na.drop("any", Seq("col2"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`col2`",
+        "referenceNames" -> "[`col2`, `col2`]"
+      )
+    )
 
     // If column names are not specified, drop() is applied to all the 
eligible rows.
     checkAnswer(
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index ceb1a75e83d..47ff942e5ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -138,18 +138,46 @@ class DataFrameStatSuite extends QueryTest with 
SharedSparkSession {
     assert(dfx.stat.freqItems(Array("table1.num", 
"table2.num")).collect()(0).length == 2)
 
     // this should throw "Reference 'num' is ambiguous"
-    intercept[AnalysisException] {
-      dfx.stat.freqItems(Array("num"))
-    }
-    intercept[AnalysisException] {
-      dfx.stat.approxQuantile("num", Array(0.1), 0.0)
-    }
-    intercept[AnalysisException] {
-      dfx.stat.cov("num", "num")
-    }
-    intercept[AnalysisException] {
-      dfx.stat.corr("num", "num")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        dfx.stat.freqItems(Array("num"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`num`",
+        "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        dfx.stat.approxQuantile("num", Array(0.1), 0.0)
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`num`",
+        "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        dfx.stat.cov("num", "num")
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`num`",
+        "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        dfx.stat.corr("num", "num")
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`num`",
+        "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+      )
+    )
   }
 
   test("SPARK-40933 test cov & corr with null values and empty dataset") {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 80f258c4659..3202ef728e5 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -1929,10 +1929,14 @@ class PlanResolutionSuite extends AnalysisTest {
 
     // no aliases
     Seq(("v2Table", "v2Table1"), ("testcat.tab", "testcat.tab1")).foreach { 
pair =>
+      def referenceNames(target: String, column: String): String = target 
match {
+        case "v2Table" => s"[`spark_catalog`.`default`.`v2Table1`.`$column`, " 
+
+          s"`spark_catalog`.`default`.`v2Table`.`$column`]"
+        case "testcat.tab" => s"[`testcat`.`tab1`.`$column`, 
`testcat`.`tab`.`$column`]"
+      }
 
       val target = pair._1
       val source = pair._2
-
       val sql1 =
         s"""
            |MERGE INTO $target
@@ -1986,8 +1990,8 @@ class PlanResolutionSuite extends AnalysisTest {
       // resolve column `i` as it's ambiguous.
       checkError(
         exception = intercept[AnalysisException](parseAndResolve(sql2)),
-        errorClass = null,
-        parameters = Map.empty,
+        errorClass = "AMBIGUOUS_REFERENCE",
+        parameters = Map("name" -> "`i`", "referenceNames" -> 
referenceNames(target, "i")),
         context = ExpectedContext(
           fragment = "i",
           start = 22 + target.length + source.length,
@@ -2002,8 +2006,8 @@ class PlanResolutionSuite extends AnalysisTest {
       // resolve column `s` as it's ambiguous.
       checkError(
         exception = intercept[AnalysisException](parseAndResolve(sql3)),
-        errorClass = null,
-        parameters = Map.empty,
+        errorClass = "AMBIGUOUS_REFERENCE",
+        parameters = Map("name" -> "`s`", "referenceNames" -> 
referenceNames(target, "s")),
         context = ExpectedContext(
           fragment = "s",
           start = 46 + target.length + source.length,
@@ -2018,8 +2022,8 @@ class PlanResolutionSuite extends AnalysisTest {
       // resolve column `s` as it's ambiguous.
       checkError(
         exception = intercept[AnalysisException](parseAndResolve(sql4)),
-        errorClass = null,
-        parameters = Map.empty,
+        errorClass = "AMBIGUOUS_REFERENCE",
+        parameters = Map("name" -> "`s`", "referenceNames" -> 
referenceNames(target, "s")),
         context = ExpectedContext(
           fragment = "s",
           start = 46 + target.length + source.length,
@@ -2034,8 +2038,8 @@ class PlanResolutionSuite extends AnalysisTest {
       // resolve column `s` as it's ambiguous.
       checkError(
         exception = intercept[AnalysisException](parseAndResolve(sql5)),
-        errorClass = null,
-        parameters = Map.empty,
+        errorClass = "AMBIGUOUS_REFERENCE",
+        parameters = Map("name" -> "`s`", "referenceNames" -> 
referenceNames(target, "s")),
         context = ExpectedContext(
           fragment = "s",
           start = 61 + target.length + source.length,
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index aa0051a54af..a7f9da84c1f 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -674,11 +674,21 @@ class OrcFilterSuite extends OrcTest with 
SharedSparkSession {
 
         // Exception thrown for ambiguous case.
         withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-          val e = intercept[AnalysisException] {
-            sql(s"select a from $tableName where a < 0").collect()
-          }
-          assert(e.getMessage.contains(
-            "Reference 'a' is ambiguous"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(s"select a from $tableName where a < 0").collect()
+            },
+            errorClass = "AMBIGUOUS_REFERENCE",
+            parameters = Map(
+              "name" -> "`a`",
+              "referenceNames" -> 
("[`spark_catalog`.`default`.`spark_32622`.`a`, " +
+                "`spark_catalog`.`default`.`spark_32622`.`a`]")),
+            context = ExpectedContext(
+              fragment = "a",
+              start = 32,
+              stop = 32
+            )
+          )
         }
       }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-41172][SQL] Migrate the ambiguous ref error to an error class

Reply via email to