agubichev commented on code in PR #43111: URL: https://github.com/apache/spark/pull/43111#discussion_r1345929714
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala: ########## @@ -461,6 +462,23 @@ object DecorrelateInnerQuery extends PredicateHelper { p.mapChildren(rewriteDomainJoins(outerPlan, _, conditions)) } + private def isCountBugFree(aggregateExpressions: Seq[NamedExpression]): Boolean = { + // The COUNT bug only appears if an aggregate expression returns a non-NULL result on an empty + // input. + // Typical example (hence the name) is COUNT(*) that returns 0 from an empty result. + // However, SUM(x) IS NULL is another case that returns 0, and in general any IS/NOT IS and CASE + // expressions are suspect (and the combination of those). + // For now we conservatively accept only those expressions that are guaranteed to be safe. + val exprsRejectEmptyInput = aggregateExpressions.map { + case _ : AttributeReference => true + case Alias(_: AttributeReference, _) => true + case Alias(_: Literal, _) => true + case Alias(a: AggregateExpression, _) if a.aggregateFunction.defaultResult == None => true + case _ => false + } + exprsRejectEmptyInput.forall(x => x == true) Review Comment: neat, thank you! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org