maropu commented on a change in pull request #20965: [SPARK-21870][SQL] Split aggregation code into small functions URL: https://github.com/apache/spark/pull/20965#discussion_r320542595
########## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala ########## @@ -255,41 +261,153 @@ case class HashAggregateExec( """.stripMargin } + private def isValidParamLength(paramLength: Int): Boolean = { + // This config is only for testing + sqlContext.getConf("spark.sql.HashAggregateExec.isValidParamLength", null) match { + case null | "" => CodeGenerator.isValidParamLength(paramLength) + case validLength => paramLength <= validLength.toInt + } + } + + // Splits aggregate code into small functions because the most of JVM implementations + // can not compile too long functions. + // + // Note: The difference from `CodeGenerator.splitExpressions` is that we define an individual + // function for each aggregation function (e.g., SUM and AVG). For example, in a query + // `SELECT SUM(a), AVG(a) FROM VALUES(1) t(a)`, we define two functions + // for `SUM(a)` and `AVG(a)`. + private def splitAggregateExpressions( + ctx: CodegenContext, + aggNames: Seq[String], + aggBufferUpdatingExprs: Seq[Seq[Expression]], + aggCodeBlocks: Seq[Block], + subExprs: Map[Expression, SubExprEliminationState]): Option[String] = { + val exprValsInSubExprs = subExprs.flatMap { case (_, s) => s.value :: s.isNull :: Nil } + if (exprValsInSubExprs.exists(_.isInstanceOf[SimpleExprValue])) { + // `SimpleExprValue`s cannot be used as an input variable for split functions, so Review comment: The current code in `splitAggregateExpressions` assumes that `ExprValue` in `SubExprEliminationState` has a variable (e.g., `VariableValue` has a variable for `variableName`) for common subexpressions. So, we can simply use it as a parameter for a split function like this; ``` /* 114 */ // do aggregate /* 115 */ // common sub-expressions /* 116 */ `javaType` `variableName` = (common expression); /* 117 */ // evaluate aggregate functions and update aggregation buffers /* 118 */ agg_doAggregate_sum_0(`variableName`); /* 120 */ /* 121 */ } /* 122 */ // A split aggregate code /* 123 */ private void agg_doAggregate_sum_1(`javaType` `variableName`) throws java.io.IOException { /* 124 */ // do aggregate for sum ``` But, `SimpleExprValue` has not a variable but an expression (`case class SimpleExprValue(expr: String, javaType: Class[_])`). So, we cannot use it as a parameter. For example, in the `IsNotNull` case, `SimpleExprValue` can have an expression `(!agg_exprIsNull_1_0)` and this `ExprValue` cannot be used as a parameter like this; ``` /* 117 */ // evaluate aggregate functions and update aggregation buffers /* 118 */ agg_doAggregate_sum_0(`(!agg_exprIsNull_1_0)`); /* 120 */ /* 121 */ } /* 122 */ // A split aggregate code (A complication error occurs because of an illegal variable name `!`) /* 123 */ private void agg_doAggregate_sum_1(`javaType` `(!agg_exprIsNull_1_0)`) throws java.io.IOException { /* 124 */ // do aggregate for sum ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org