Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/21853#discussion_r204609653 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala --- @@ -164,10 +164,20 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) * Optimize all the subqueries inside expression. */ object OptimizeSubqueries extends Rule[LogicalPlan] { + private def removeTopLevelSorts(plan: LogicalPlan): LogicalPlan = { + plan match { + case Sort(_, _, child) => child + case Project(fields, child) => Project(fields, removeTopLevelSorts(child)) + case other => other + } + } def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { case s: SubqueryExpression => val Subquery(newPlan) = Optimizer.this.execute(Subquery(s.plan)) - s.withNewPlan(newPlan) + // At this point we have an optimized subquery plan that we are going to attach + // to this subquery expression. Here we can safely remove any top level sorts --- End diff -- super nit: `any top level sort`?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org