Hisoka-X commented on code in PR #40865: URL: https://github.com/apache/spark/pull/40865#discussion_r1177477109
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala: ########## @@ -599,10 +599,39 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe if (Utils.isTesting) { assert(mayHaveCountBug.isDefined) } + + def queryOutputFoldable(list: Seq[NamedExpression]): Boolean = { + trimAliases(list.filter(p => p.exprId.equals(query.output.head.exprId)).head).foldable + } + + // SPARK-43156: We can judge whether the column returned by subquery is + // foldable (already handle by [[NullPropagation]]). If it is, it means that + // the result of this value has no substantial relationship with the data, + // and the presence or absence of data will not affect this column. So in + // this case, this column can be extracted from the JOIN to ensure that this + // value can be obtained regardless of whether the data JOIN is successful or not. + lazy val resultFoldable = { + query match { + case Project(expressions, _) => + queryOutputFoldable(expressions) + case Aggregate(_, expressions, _) => + queryOutputFoldable(expressions) + case _ => + false + } + } + if (resultWithZeroTups.isEmpty) { // CASE 1: Subquery guaranteed not to have the COUNT bug because it evaluates to NULL // with zero tuples. planWithoutCountBug + } else if (mayHaveCountBug.getOrElse(false) && resultFoldable && Review Comment: > can you show me which test will fail if we don't have the changes here? if without this, the logic will use `Literal(0)` as result to replace `count(1)`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org