beliefer commented on code in PR #44145:
URL: https://github.com/apache/spark/pull/44145#discussion_r1418231837


##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala:
##########
@@ -68,10 +71,56 @@ object InferWindowGroupLimit extends Rule[LogicalPlan] with 
PredicateHelper {
     case _ => false
   }
 
+  /**
+   * All window expressions should not have SizeBasedWindowFunction, all 
lower/upper of
+   * specifiedWindowFrame is UnboundedPreceding/CurrentRow, and window 
orderSpec is not foldable,
+   * so that we can safely do the early stop.
+   */
+  private def limitSupport(limit: Int, window: Window): Boolean =
+    limit <= conf.windowGroupLimitThreshold && window.child.maxRows.forall(_ > 
limit) &&
+      !window.child.isInstanceOf[WindowGroupLimit] &&
+      window.orderSpec.exists(!_.foldable) &&
+      
!LimitPushDownThroughWindow.supportsPushdownThroughWindow(window.windowExpressions)
 &&
+      window.windowExpressions.forall {
+        case Alias(WindowExpression(windowFunction, WindowSpecDefinition(_, _,
+        SpecifiedWindowFrame(_, UnboundedPreceding, CurrentRow))), _)
+          if !windowFunction.isInstanceOf[SizeBasedWindowFunction] => true
+        case _ => false
+      }
+
+  private def isRowFrame(windowExpression: NamedExpression): Boolean = 
windowExpression match {
+    case Alias(WindowExpression(_, WindowSpecDefinition(_, _,
+    SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow))), _) => 
true
+    case _ => false
+  }
+
+  private def rankLikeFunction(windowExpressions: Seq[NamedExpression]): 
Expression =
+    // If windowExpressions all are RowFrame, choose SimpleLimitIterator,
+    // else RankLimitIterator to obtain enough rows for ensure data accuracy.
+    if (windowExpressions.forall(isRowFrame)) {
+      new RowNumber
+    } else {
+      new Rank

Review Comment:
   Take an example, aggregate function `sum(a) with row frame` and another 
`sum(a) with range frame`.
   Now we select the `Rank`.
   Is the output of `sum(a) with row frame` correct?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to