Github user maropu commented on a diff in the pull request: https://github.com/apache/spark/pull/19767#discussion_r151830994 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala --- @@ -105,6 +105,41 @@ abstract class Expression extends TreeNode[Expression] { val isNull = ctx.freshName("isNull") val value = ctx.freshName("value") val ve = doGenCode(ctx, ExprCode("", isNull, value)) + + // TODO: support whole stage codegen too + if (ve.code.trim.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) { + val setIsNull = if (ve.isNull != "false" && ve.isNull != "true") { + val globalIsNull = ctx.freshName("globalIsNull") + ctx.addMutableState("boolean", globalIsNull, s"$globalIsNull = false;") + val localIsNull = ve.isNull + ve.isNull = globalIsNull + s"$globalIsNull = $localIsNull;" + } else { + "" + } + + val setValue = { + val globalValue = ctx.freshName("globalValue") + ctx.addMutableState( + ctx.javaType(dataType), globalValue, s"$globalValue = ${ctx.defaultValue(dataType)};") + val localValue = ve.value + ve.value = globalValue + s"$globalValue = $localValue;" + } + + val funcName = ctx.freshName(nodeName) + val funcFullName = ctx.addNewFunction(funcName, + s""" + |private void $funcName(InternalRow ${ctx.INPUT_ROW}) { + | ${ve.code.trim} + | $setValue --- End diff -- Is that a bad idea to prepare some utility classes to store a pair (value, isNull) for this splitting cases? I feel class fields are valuable resources.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org