Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/18968#discussion_r133767375 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala --- @@ -140,25 +140,55 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate { require(list != null, "list should not be null") override def checkInputDataTypes(): TypeCheckResult = { list match { - case ListQuery(sub, _, _) :: Nil => + case (l @ ListQuery(sub, children, _)) :: Nil => val valExprs = value match { case cns: CreateNamedStruct => cns.valExprs case expr => Seq(expr) } - if (valExprs.length != sub.output.length) { - TypeCheckResult.TypeCheckFailure( + + // SPARK-21759: + // It is possibly that the subquery plan has more output than value expressions, because + // the condition expressions in `ListQuery` might use part of subquery plan's output. + // For example, in the following query plan, the condition of `ListQuery` uses value#207 + // from the subquery query. For now the size of output of subquery is 2, the size of value + // is 1. + // + // Filter key#201 IN (list#200 [(value#207 = min(value)#204)]) + // : +- Project [key#206, value#207] + // : +- Filter (value#207 > val_9) --- End diff -- why you pick a different example in PR description?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org