beliefer commented on code in PR #44145: URL: https://github.com/apache/spark/pull/44145#discussion_r1418231837
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala: ########## @@ -68,10 +71,56 @@ object InferWindowGroupLimit extends Rule[LogicalPlan] with PredicateHelper { case _ => false } + /** + * All window expressions should not have SizeBasedWindowFunction, all lower/upper of + * specifiedWindowFrame is UnboundedPreceding/CurrentRow, and window orderSpec is not foldable, + * so that we can safely do the early stop. + */ + private def limitSupport(limit: Int, window: Window): Boolean = + limit <= conf.windowGroupLimitThreshold && window.child.maxRows.forall(_ > limit) && + !window.child.isInstanceOf[WindowGroupLimit] && + window.orderSpec.exists(!_.foldable) && + !LimitPushDownThroughWindow.supportsPushdownThroughWindow(window.windowExpressions) && + window.windowExpressions.forall { + case Alias(WindowExpression(windowFunction, WindowSpecDefinition(_, _, + SpecifiedWindowFrame(_, UnboundedPreceding, CurrentRow))), _) + if !windowFunction.isInstanceOf[SizeBasedWindowFunction] => true + case _ => false + } + + private def isRowFrame(windowExpression: NamedExpression): Boolean = windowExpression match { + case Alias(WindowExpression(_, WindowSpecDefinition(_, _, + SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))), _) => true + case _ => false + } + + private def rankLikeFunction(windowExpressions: Seq[NamedExpression]): Expression = + // If windowExpressions all are RowFrame, choose SimpleLimitIterator, + // else RankLimitIterator to obtain enough rows for ensure data accuracy. + if (windowExpressions.forall(isRowFrame)) { + new RowNumber + } else { + new Rank Review Comment: Take an example, aggregate function `sum(a) with row frame` and another `sum(a) with range frame`. Now we select the `Rank`. Is the output of `sum(a) with row frame` correct? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org