Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/22563#discussion_r220826272 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala --- @@ -155,37 +155,35 @@ case class InSubquery(values: Seq[Expression], query: ListQuery) override def checkInputDataTypes(): TypeCheckResult = { - val mismatchOpt = !DataType.equalsStructurally(query.dataType, value.dataType, - ignoreNullability = true) - if (mismatchOpt) { - if (values.length != query.childOutputs.length) { - TypeCheckResult.TypeCheckFailure( - s""" - |The number of columns in the left hand side of an IN subquery does not match the - |number of columns in the output of subquery. - |#columns in left hand side: ${values.length}. - |#columns in right hand side: ${query.childOutputs.length}. - |Left side columns: - |[${values.map(_.sql).mkString(", ")}]. - |Right side columns: - |[${query.childOutputs.map(_.sql).mkString(", ")}].""".stripMargin) - } else { - val mismatchedColumns = values.zip(query.childOutputs).flatMap { - case (l, r) if l.dataType != r.dataType => - Seq(s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})") - case _ => None - } - TypeCheckResult.TypeCheckFailure( - s""" - |The data type of one or more elements in the left hand side of an IN subquery - |is not compatible with the data type of the output of the subquery - |Mismatched columns: - |[${mismatchedColumns.mkString(", ")}] - |Left side: - |[${values.map(_.dataType.catalogString).mkString(", ")}]. - |Right side: - |[${query.childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin) + if (values.length != query.childOutputs.length) { --- End diff -- the reason why I added the check in `ResolveSubquery` was to fail fast immediately, without wasting time doing all the analysis until we check the data types. In this way, the check in `checkInputDataTypes` was actually useless, but I left it there for safety. I agree unifying them and actually I am fine with this change, but I'd prefer keeping the check in `ResolveSubquery` in order to fail immediately (it is not a big deal anyway). What do you think?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org