Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/19821#discussion_r153123637 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala --- @@ -785,13 +785,36 @@ class CodegenContext { * @param expressions the codes to evaluate expressions. */ def splitExpressions(row: String, expressions: Seq[String]): String = { - if (row == null || currentVars != null) { + if (INPUT_ROW == null || currentVars != null) { // Cannot split these expressions because they are not created from a row object. return expressions.mkString("\n") } splitExpressions(expressions, funcName = "apply", arguments = ("InternalRow", row) :: Nil) } + /** + * Splits the generated code of expressions into multiple functions, because function has + * 64kb code size limit in JVM. This version takes care of INPUT_ROW and currentVars + * + * @param expressions the codes to evaluate expressions. + * @param funcName the split function name base. + * @param argumentsExceptRow the list of (type, name) of the arguments of the split function + * except for ctx.INPUT_ROW + */ + def splitExpressions( + expressions: Seq[String], + funcName: String, + argumentsExceptRow: Seq[(String, String)]): String = { --- End diff -- Could you check the caller of case 3 which also need to check `INPUT_ROW` and `currentVars`? It sounds like some of them miss the checking. In addition, case `1` and `2` can be easily combined. I think we need a different name for case `1` and `2`. How about `splitExpressionsOnInputRow`? cc @cloud-fan
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org