Repository: spark Updated Branches: refs/heads/master 579fbcf3b -> 64d8f37c7
[SPARK-16726][SQL] Improve `Union/Intersect/Except` error messages on incompatible types ## What changes were proposed in this pull request? Currently, `UNION` queries on incompatible types show misleading error messages, i.e., `unresolved operator Union`. We had better show a more correct message. This will help users in the situation of [SPARK-16704](https://issues.apache.org/jira/browse/SPARK-16704). **Before** ```scala scala> sql("select 1,2,3 union (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: unresolved operator 'Union; scala> sql("select 1,2,3 intersect (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: unresolved operator 'Intersect; scala> sql("select 1,2,3 except (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: unresolved operator 'Except; ``` **After** ```scala scala> sql("select 1,2,3 union (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: Union can only be performed on tables with the compatible column types. ArrayType(IntegerType,false) <> IntegerType at the second column of the second table; scala> sql("select 1,2,3 intersect (select 1,array(2),3)") org.apache.spark.sql.AnalysisException: Intersect can only be performed on tables with the compatible column types. ArrayType(IntegerType,false) <> IntegerType at the second column of the second table; scala> sql("select 1,2,3 except (select array(1),array(2),3)") org.apache.spark.sql.AnalysisException: Except can only be performed on tables with the compatible column types. ArrayType(IntegerType,false) <> IntegerType at the first column of the second table; ``` ## How was this patch tested? Pass the Jenkins test with a new test case. Author: Dongjoon Hyun <dongj...@apache.org> Closes #14355 from dongjoon-hyun/SPARK-16726. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/64d8f37c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/64d8f37c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/64d8f37c Branch: refs/heads/master Commit: 64d8f37c717cbc9c1c3649cae4c7cc4e628cd72d Parents: 579fbcf Author: Dongjoon Hyun <dongj...@apache.org> Authored: Mon Aug 1 11:12:58 2016 +0200 Committer: Herman van Hovell <hvanhov...@databricks.com> Committed: Mon Aug 1 11:12:58 2016 +0200 ---------------------------------------------------------------------- .../sql/catalyst/analysis/CheckAnalysis.scala | 44 ++++++++++++++------ .../catalyst/analysis/AnalysisErrorSuite.scala | 15 +++++++ 2 files changed, 46 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/64d8f37c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 8b87a4e..41b7e62 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -253,19 +253,6 @@ trait CheckAnalysis extends PredicateHelper { } } - case s @ SetOperation(left, right) if left.output.length != right.output.length => - failAnalysis( - s"${s.nodeName} can only be performed on tables with the same number of columns, " + - s"but the left table has ${left.output.length} columns and the right has " + - s"${right.output.length}") - - case s: Union if s.children.exists(_.output.length != s.children.head.output.length) => - val firstError = s.children.find(_.output.length != s.children.head.output.length).get - failAnalysis( - s"Unions can only be performed on tables with the same number of columns, " + - s"but one table has '${firstError.output.length}' columns and another table has " + - s"'${s.children.head.output.length}' columns") - case GlobalLimit(limitExpr, _) => checkLimitClause(limitExpr) case LocalLimit(limitExpr, _) => checkLimitClause(limitExpr) @@ -280,6 +267,37 @@ trait CheckAnalysis extends PredicateHelper { case p if p.expressions.exists(PredicateSubquery.hasPredicateSubquery) => failAnalysis(s"Predicate sub-queries can only be used in a Filter: $p") + case _: Union | _: SetOperation if operator.children.length > 1 => + def dataTypes(plan: LogicalPlan): Seq[DataType] = plan.output.map(_.dataType) + def ordinalNumber(i: Int): String = i match { + case 0 => "first" + case 1 => "second" + case i => s"${i}th" + } + val ref = dataTypes(operator.children.head) + operator.children.tail.zipWithIndex.foreach { case (child, ti) => + // Check the number of columns + if (child.output.length != ref.length) { + failAnalysis( + s""" + |${operator.nodeName} can only be performed on tables with the same number + |of columns, but the first table has ${ref.length} columns and + |the ${ordinalNumber(ti + 1)} table has ${child.output.length} columns + """.stripMargin.replace("\n", " ").trim()) + } + // Check if the data types match. + dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) => + if (dt1 != dt2) { + failAnalysis( + s""" + |${operator.nodeName} can only be performed on tables with the compatible + |column types. $dt1 <> $dt2 at the ${ordinalNumber(ci)} column of + |the ${ordinalNumber(ti + 1)} table + """.stripMargin.replace("\n", " ").trim()) + } + } + } + case _ => // Fallbacks to the following checks } http://git-wip-us.apache.org/repos/asf/spark/blob/64d8f37c/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index ff112c5..8363a1b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -278,6 +278,21 @@ class AnalysisErrorSuite extends AnalysisTest { testRelation.output.length.toString :: Nil) errorTest( + "union with incompatible column types", + testRelation.union(nestedRelation), + "union" :: "the compatible column types" :: Nil) + + errorTest( + "intersect with incompatible column types", + testRelation.intersect(nestedRelation), + "intersect" :: "the compatible column types" :: Nil) + + errorTest( + "except with incompatible column types", + testRelation.except(nestedRelation), + "except" :: "the compatible column types" :: Nil) + + errorTest( "SPARK-9955: correct error message for aggregate", // When parse SQL string, we will wrap aggregate expressions with UnresolvedAlias. testRelation2.where('bad_column > 1).groupBy('a)(UnresolvedAlias(max('b))), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org