zml1206 commented on code in PR #44145: URL: https://github.com/apache/spark/pull/44145#discussion_r1418213459
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala: ########## @@ -68,10 +71,56 @@ object InferWindowGroupLimit extends Rule[LogicalPlan] with PredicateHelper { case _ => false } + /** + * All window expressions should not have SizeBasedWindowFunction, all lower/upper of + * specifiedWindowFrame is UnboundedPreceding/CurrentRow, and window orderSpec is not foldable, + * so that we can safely do the early stop. + */ + private def limitSupport(limit: Int, window: Window): Boolean = + limit <= conf.windowGroupLimitThreshold && window.child.maxRows.forall(_ > limit) && + !window.child.isInstanceOf[WindowGroupLimit] && + window.orderSpec.exists(!_.foldable) && + !LimitPushDownThroughWindow.supportsPushdownThroughWindow(window.windowExpressions) && + window.windowExpressions.forall { + case Alias(WindowExpression(windowFunction, WindowSpecDefinition(_, _, + SpecifiedWindowFrame(_, UnboundedPreceding, CurrentRow))), _) + if !windowFunction.isInstanceOf[SizeBasedWindowFunction] => true + case _ => false + } + + private def isRowFrame(windowExpression: NamedExpression): Boolean = windowExpression match { + case Alias(WindowExpression(_, WindowSpecDefinition(_, _, + SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))), _) => true + case _ => false + } + + private def rankLikeFunction(windowExpressions: Seq[NamedExpression]): Expression = + // If windowExpressions all are RowFrame, choose SimpleLimitIterator, + // else RankLimitIterator to obtain enough rows for ensure data accuracy. + if (windowExpressions.forall(isRowFrame)) { + new RowNumber + } else { + new Rank Review Comment: requires all the frame is `RowFrame`. As long as `RowFrame` exists, press `Rangeframe`. `Rangeframe` requires more data. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org