Repository: spark Updated Branches: refs/heads/master f856fe483 -> a1adde540
[SPARK-24341][SQL][FOLLOWUP] remove duplicated error checking ## What changes were proposed in this pull request? There are 2 places we check for problematic `InSubquery`: the rule `ResolveSubquery` and `InSubquery.checkInputDataTypes`. We should unify them. ## How was this patch tested? existing tests Closes #22563 from cloud-fan/followup. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a1adde54 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a1adde54 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a1adde54 Branch: refs/heads/master Commit: a1adde54086469b45950946d9143d17daab01f18 Parents: f856fe4 Author: Wenchen Fan <wenc...@databricks.com> Authored: Thu Sep 27 21:19:25 2018 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Thu Sep 27 21:19:25 2018 +0800 ---------------------------------------------------------------------- .../spark/sql/catalyst/analysis/Analyzer.scala | 16 +----- .../sql/catalyst/expressions/predicates.scala | 60 ++++++++++---------- .../sql-tests/results/datetime.sql.out | 5 +- .../results/higher-order-functions.sql.out | 1 + .../subquery/in-subquery/in-basic.sql.out | 10 ++-- .../negative-cases/subq-input-typecheck.sql.out | 20 +++---- 6 files changed, 49 insertions(+), 63 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index e3b1712..7034dfd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1436,21 +1436,7 @@ class Analyzer( val expr = resolveSubQuery(l, plans)((plan, exprs) => { ListQuery(plan, exprs, exprId, plan.output) }) - val subqueryOutput = expr.plan.output - val resolvedIn = InSubquery(values, expr.asInstanceOf[ListQuery]) - if (values.length != subqueryOutput.length) { - throw new AnalysisException( - s"""Cannot analyze ${resolvedIn.sql}. - |The number of columns in the left hand side of an IN subquery does not match the - |number of columns in the output of subquery. - |#columns in left hand side: ${values.length} - |#columns in right hand side: ${subqueryOutput.length} - |Left side columns: - |[${values.map(_.sql).mkString(", ")}] - |Right side columns: - |[${subqueryOutput.map(_.sql).mkString(", ")}]""".stripMargin) - } - resolvedIn + InSubquery(values, expr.asInstanceOf[ListQuery]) } } http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 149bd79..2125340 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -144,7 +144,7 @@ case class Not(child: Expression) case class InSubquery(values: Seq[Expression], query: ListQuery) extends Predicate with Unevaluable { - @transient lazy val value: Expression = if (values.length > 1) { + @transient private lazy val value: Expression = if (values.length > 1) { CreateNamedStruct(values.zipWithIndex.flatMap { case (v: NamedExpression, _) => Seq(Literal(v.name), v) case (v, idx) => Seq(Literal(s"_$idx"), v) @@ -155,37 +155,35 @@ case class InSubquery(values: Seq[Expression], query: ListQuery) override def checkInputDataTypes(): TypeCheckResult = { - val mismatchOpt = !DataType.equalsStructurally(query.dataType, value.dataType, - ignoreNullability = true) - if (mismatchOpt) { - if (values.length != query.childOutputs.length) { - TypeCheckResult.TypeCheckFailure( - s""" - |The number of columns in the left hand side of an IN subquery does not match the - |number of columns in the output of subquery. - |#columns in left hand side: ${values.length}. - |#columns in right hand side: ${query.childOutputs.length}. - |Left side columns: - |[${values.map(_.sql).mkString(", ")}]. - |Right side columns: - |[${query.childOutputs.map(_.sql).mkString(", ")}].""".stripMargin) - } else { - val mismatchedColumns = values.zip(query.childOutputs).flatMap { - case (l, r) if l.dataType != r.dataType => - Seq(s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})") - case _ => None - } - TypeCheckResult.TypeCheckFailure( - s""" - |The data type of one or more elements in the left hand side of an IN subquery - |is not compatible with the data type of the output of the subquery - |Mismatched columns: - |[${mismatchedColumns.mkString(", ")}] - |Left side: - |[${values.map(_.dataType.catalogString).mkString(", ")}]. - |Right side: - |[${query.childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin) + if (values.length != query.childOutputs.length) { + TypeCheckResult.TypeCheckFailure( + s""" + |The number of columns in the left hand side of an IN subquery does not match the + |number of columns in the output of subquery. + |#columns in left hand side: ${values.length}. + |#columns in right hand side: ${query.childOutputs.length}. + |Left side columns: + |[${values.map(_.sql).mkString(", ")}]. + |Right side columns: + |[${query.childOutputs.map(_.sql).mkString(", ")}].""".stripMargin) + } else if (!DataType.equalsStructurally( + query.dataType, value.dataType, ignoreNullability = true)) { + + val mismatchedColumns = values.zip(query.childOutputs).flatMap { + case (l, r) if l.dataType != r.dataType => + Seq(s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})") + case _ => None } + TypeCheckResult.TypeCheckFailure( + s""" + |The data type of one or more elements in the left hand side of an IN subquery + |is not compatible with the data type of the output of the subquery + |Mismatched columns: + |[${mismatchedColumns.mkString(", ")}] + |Left side: + |[${values.map(_.dataType.catalogString).mkString(", ")}]. + |Right side: + |[${query.childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin) } else { TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName") } http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/datetime.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 4e1cfa6..63aa004 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -82,9 +82,10 @@ struct<a:int,b:int> 1 2 2 3 + -- !query 9 select weekday('2007-02-03'), weekday('2009-07-30'), weekday('2017-05-27'), weekday(null), weekday('1582-10-15 13:10:15') --- !query 3 schema +-- !query 9 schema struct<weekday(CAST(2007-02-03 AS DATE)):int,weekday(CAST(2009-07-30 AS DATE)):int,weekday(CAST(2017-05-27 AS DATE)):int,weekday(CAST(NULL AS DATE)):int,weekday(CAST(1582-10-15 13:10:15 AS DATE)):int> --- !query 3 output +-- !query 9 output 5 3 5 NULL 4 http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out index 32d20d1..1b7c6f4 100644 --- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out @@ -201,6 +201,7 @@ struct<> -- !query 20 output + -- !query 21 select transform_keys(ys, (k, v) -> k) as v from nested -- !query 21 schema http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out index 088db55..686fe49 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out @@ -41,15 +41,15 @@ select 1 from tab_a where (a1, b1) not in (select (a2, b2) from tab_b) struct<> -- !query 4 output org.apache.spark.sql.AnalysisException -Cannot analyze (named_struct('a1', tab_a.`a1`, 'b1', tab_a.`b1`) IN (listquery())). +cannot resolve '(named_struct('a1', tab_a.`a1`, 'b1', tab_a.`b1`) IN (listquery()))' due to data type mismatch: The number of columns in the left hand side of an IN subquery does not match the number of columns in the output of subquery. -#columns in left hand side: 2 -#columns in right hand side: 1 +#columns in left hand side: 2. +#columns in right hand side: 1. Left side columns: -[tab_a.`a1`, tab_a.`b1`] +[tab_a.`a1`, tab_a.`b1`]. Right side columns: -[`named_struct(a2, a2, b2, b2)`]; +[`named_struct(a2, a2, b2, b2)`].; -- !query 5 http://git-wip-us.apache.org/repos/asf/spark/blob/a1adde54/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out ---------------------------------------------------------------------- diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out index c52e570..dcd3005 100644 --- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out @@ -92,15 +92,15 @@ t1a IN (SELECT t2a, t2b struct<> -- !query 7 output org.apache.spark.sql.AnalysisException -Cannot analyze (t1.`t1a` IN (listquery(t1.`t1a`))). +cannot resolve '(t1.`t1a` IN (listquery(t1.`t1a`)))' due to data type mismatch: The number of columns in the left hand side of an IN subquery does not match the number of columns in the output of subquery. -#columns in left hand side: 1 -#columns in right hand side: 2 +#columns in left hand side: 1. +#columns in right hand side: 2. Left side columns: -[t1.`t1a`] +[t1.`t1a`]. Right side columns: -[t2.`t2a`, t2.`t2b`]; +[t2.`t2a`, t2.`t2b`].; -- !query 8 @@ -113,15 +113,15 @@ WHERE struct<> -- !query 8 output org.apache.spark.sql.AnalysisException -Cannot analyze (named_struct('t1a', t1.`t1a`, 't1b', t1.`t1b`) IN (listquery(t1.`t1a`))). +cannot resolve '(named_struct('t1a', t1.`t1a`, 't1b', t1.`t1b`) IN (listquery(t1.`t1a`)))' due to data type mismatch: The number of columns in the left hand side of an IN subquery does not match the number of columns in the output of subquery. -#columns in left hand side: 2 -#columns in right hand side: 1 +#columns in left hand side: 2. +#columns in right hand side: 1. Left side columns: -[t1.`t1a`, t1.`t1b`] +[t1.`t1a`, t1.`t1b`]. Right side columns: -[t2.`t2a`]; +[t2.`t2a`].; -- !query 9 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org