Repository: spark
Updated Branches:
  refs/heads/master 579fbcf3b -> 64d8f37c7


[SPARK-16726][SQL] Improve `Union/Intersect/Except` error messages on 
incompatible types

## What changes were proposed in this pull request?

Currently, `UNION` queries on incompatible types show misleading error 
messages, i.e., `unresolved operator Union`. We had better show a more correct 
message. This will help users in the situation of 
[SPARK-16704](https://issues.apache.org/jira/browse/SPARK-16704).

**Before**
```scala
scala> sql("select 1,2,3 union (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: unresolved operator 'Union;
scala> sql("select 1,2,3 intersect (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: unresolved operator 'Intersect;
scala> sql("select 1,2,3 except (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: unresolved operator 'Except;
```

**After**
```scala
scala> sql("select 1,2,3 union (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: Union can only be performed on tables 
with the compatible column types. ArrayType(IntegerType,false) <> IntegerType 
at the second column of the second table;
scala> sql("select 1,2,3 intersect (select 1,array(2),3)")
org.apache.spark.sql.AnalysisException: Intersect can only be performed on 
tables with the compatible column types. ArrayType(IntegerType,false) <> 
IntegerType at the second column of the second table;
scala> sql("select 1,2,3 except (select array(1),array(2),3)")
org.apache.spark.sql.AnalysisException: Except can only be performed on tables 
with the compatible column types. ArrayType(IntegerType,false) <> IntegerType 
at the first column of the second table;
```

## How was this patch tested?

Pass the Jenkins test with a new test case.

Author: Dongjoon Hyun <dongj...@apache.org>

Closes #14355 from dongjoon-hyun/SPARK-16726.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/64d8f37c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/64d8f37c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/64d8f37c

Branch: refs/heads/master
Commit: 64d8f37c717cbc9c1c3649cae4c7cc4e628cd72d
Parents: 579fbcf
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Mon Aug 1 11:12:58 2016 +0200
Committer: Herman van Hovell <hvanhov...@databricks.com>
Committed: Mon Aug 1 11:12:58 2016 +0200

----------------------------------------------------------------------
 .../sql/catalyst/analysis/CheckAnalysis.scala   | 44 ++++++++++++++------
 .../catalyst/analysis/AnalysisErrorSuite.scala  | 15 +++++++
 2 files changed, 46 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/64d8f37c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 8b87a4e..41b7e62 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -253,19 +253,6 @@ trait CheckAnalysis extends PredicateHelper {
               }
             }
 
-          case s @ SetOperation(left, right) if left.output.length != 
right.output.length =>
-            failAnalysis(
-              s"${s.nodeName} can only be performed on tables with the same 
number of columns, " +
-                s"but the left table has ${left.output.length} columns and the 
right has " +
-                s"${right.output.length}")
-
-          case s: Union if s.children.exists(_.output.length != 
s.children.head.output.length) =>
-            val firstError = s.children.find(_.output.length != 
s.children.head.output.length).get
-            failAnalysis(
-              s"Unions can only be performed on tables with the same number of 
columns, " +
-                s"but one table has '${firstError.output.length}' columns and 
another table has " +
-                s"'${s.children.head.output.length}' columns")
-
           case GlobalLimit(limitExpr, _) => checkLimitClause(limitExpr)
 
           case LocalLimit(limitExpr, _) => checkLimitClause(limitExpr)
@@ -280,6 +267,37 @@ trait CheckAnalysis extends PredicateHelper {
           case p if 
p.expressions.exists(PredicateSubquery.hasPredicateSubquery) =>
             failAnalysis(s"Predicate sub-queries can only be used in a Filter: 
$p")
 
+          case _: Union | _: SetOperation if operator.children.length > 1 =>
+            def dataTypes(plan: LogicalPlan): Seq[DataType] = 
plan.output.map(_.dataType)
+            def ordinalNumber(i: Int): String = i match {
+              case 0 => "first"
+              case 1 => "second"
+              case i => s"${i}th"
+            }
+            val ref = dataTypes(operator.children.head)
+            operator.children.tail.zipWithIndex.foreach { case (child, ti) =>
+              // Check the number of columns
+              if (child.output.length != ref.length) {
+                failAnalysis(
+                  s"""
+                    |${operator.nodeName} can only be performed on tables with 
the same number
+                    |of columns, but the first table has ${ref.length} columns 
and
+                    |the ${ordinalNumber(ti + 1)} table has 
${child.output.length} columns
+                  """.stripMargin.replace("\n", " ").trim())
+              }
+              // Check if the data types match.
+              dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, 
dt2), ci) =>
+                if (dt1 != dt2) {
+                  failAnalysis(
+                    s"""
+                      |${operator.nodeName} can only be performed on tables 
with the compatible
+                      |column types. $dt1 <> $dt2 at the ${ordinalNumber(ci)} 
column of
+                      |the ${ordinalNumber(ti + 1)} table
+                    """.stripMargin.replace("\n", " ").trim())
+                }
+              }
+            }
+
           case _ => // Fallbacks to the following checks
         }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/64d8f37c/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index ff112c5..8363a1b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -278,6 +278,21 @@ class AnalysisErrorSuite extends AnalysisTest {
       testRelation.output.length.toString :: Nil)
 
   errorTest(
+    "union with incompatible column types",
+    testRelation.union(nestedRelation),
+    "union" :: "the compatible column types" :: Nil)
+
+  errorTest(
+    "intersect with incompatible column types",
+    testRelation.intersect(nestedRelation),
+    "intersect" :: "the compatible column types" :: Nil)
+
+  errorTest(
+    "except with incompatible column types",
+    testRelation.except(nestedRelation),
+    "except" :: "the compatible column types" :: Nil)
+
+  errorTest(
     "SPARK-9955: correct error message for aggregate",
     // When parse SQL string, we will wrap aggregate expressions with 
UnresolvedAlias.
     testRelation2.where('bad_column > 1).groupBy('a)(UnresolvedAlias(max('b))),


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to