[GitHub] spark pull request #19872: WIP: [SPARK-22274][PySpark] User-defined aggregat...

icexelloss Wed, 27 Dec 2017 13:58:24 -0800

Github user icexelloss commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19872#discussion_r158872825
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
 ---
    @@ -48,29 +48,46 @@ object ExtractPythonUDFFromAggregate extends 
Rule[LogicalPlan] {
         }.isDefined
       }
     
    +  private def isPandasGroupAggUdf(expr: Expression): Boolean = expr match {
    +      case PythonUDF(_, _, _, _, PythonEvalType.SQL_PANDAS_GROUP_AGG_UDF) 
=> true
    +      case Alias(child, _) => isPandasGroupAggUdf(child)
    +      case _ => false
    +  }
    +
    +  private def hasPandasGroupAggUdf(agg: Aggregate): Boolean = {
    +    val actualAggExpr = 
agg.aggregateExpressions.drop(agg.groupingExpressions.length)
    +    actualAggExpr.exists(isPandasGroupAggUdf)
    +  }
    +
    +
       private def extract(agg: Aggregate): LogicalPlan = {
         val projList = new ArrayBuffer[NamedExpression]()
         val aggExpr = new ArrayBuffer[NamedExpression]()
    -    agg.aggregateExpressions.foreach { expr =>
    -      if (hasPythonUdfOverAggregate(expr, agg)) {
    -        // Python UDF can only be evaluated after aggregate
    -        val newE = expr transformDown {
    -          case e: Expression if belongAggregate(e, agg) =>
    -            val alias = e match {
    -              case a: NamedExpression => a
    -              case o => Alias(e, "agg")()
    -            }
    -            aggExpr += alias
    -            alias.toAttribute
    +
    +    if (hasPandasGroupAggUdf(agg)) {
    +      Aggregate(agg.groupingExpressions, agg.aggregateExpressions, 
agg.child)
    --- End diff --
    
    I am not sure. But I added copy in `ExtractGroupAggPandasUDFFromAggregate` 
similar to  existing rules.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #19872: WIP: [SPARK-22274][PySpark] User-defined aggregat...

Reply via email to