Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/19962#discussion_r156617815 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala --- @@ -981,80 +976,52 @@ case class ScalaUDF( } // scalastyle:on line.size.limit - - private val converterClassName = classOf[Any => Any].getName - private val scalaUDFClassName = classOf[ScalaUDF].getName - private val typeConvertersClassName = CatalystTypeConverters.getClass.getName + ".MODULE$" - - // Generate codes used to convert the arguments to Scala type for user-defined functions - private[this] def genCodeForConverter(ctx: CodegenContext, index: Int): (String, String) = { - val converterTerm = ctx.freshName("converter") - val expressionIdx = ctx.references.size - 1 - (converterTerm, - s"$converterClassName $converterTerm = ($converterClassName)$typeConvertersClassName" + - s".createToScalaConverter(((Expression)((($scalaUDFClassName)" + - s"references[$expressionIdx]).getChildren().apply($index))).dataType());") - } - override def doGenCode( ctx: CodegenContext, ev: ExprCode): ExprCode = { - val scalaUDF = ctx.freshName("scalaUDF") - val scalaUDFRef = ctx.addReferenceObj("scalaUDFRef", this, scalaUDFClassName) - - // Object to convert the returned value of user-defined functions to Catalyst type - val catalystConverterTerm = ctx.freshName("catalystConverter") + val converterClassName = classOf[Any => Any].getName + // The type converters for inputs and the result. + val converters: Array[Any => Any] = children.map { c => + CatalystTypeConverters.createToScalaConverter(c.dataType) + }.toArray :+ CatalystTypeConverters.createToCatalystConverter(dataType) + val convertersTerm = ctx.addReferenceObj("converters", converters, s"$converterClassName[]") + val errorMsgTerm = ctx.addReferenceObj("errMsg", udfErrorMessage) val resultTerm = ctx.freshName("result") - // This must be called before children expressions' codegen - // because ctx.references is used in genCodeForConverter - val converterTerms = children.indices.map(genCodeForConverter(ctx, _)) - - // Initialize user-defined function - val funcClassName = s"scala.Function${children.size}" - - val funcTerm = ctx.freshName("udf") - // codegen for children expressions val evals = children.map(_.genCode(ctx)) // Generate the codes for expressions and calling user-defined function // We need to get the boxedType of dataType's javaType here. Because for the dataType // such as IntegerType, its javaType is `int` and the returned type of user-defined // function is Object. Trying to convert an Object to `int` will cause casting exception. - val evalCode = evals.map(_.code).mkString - val (converters, funcArguments) = converterTerms.zipWithIndex.map { - case ((convName, convInit), i) => - val eval = evals(i) - val argTerm = ctx.freshName("arg") - val convert = - s""" - |$convInit - |Object $argTerm = ${eval.isNull} ? null : $convName.apply(${eval.value}); - """.stripMargin - (convert, argTerm) - }.unzip + val evalCode = evals.map(_.code).mkString("\n") + val initFuncArgs = scala.collection.mutable.ListBuffer.empty[String] + val funcArguments = evals.zipWithIndex.map { case (eval, i) => --- End diff -- very nit: can we do something like: ``` val (funcArguments, initFuncArgs) = evals.zipWithIndex.map { case (eval, i) => val argTerm = ctx.freshName("arg") val initFuncArgs = s"Object $argTerm = ${eval.isNull} ? null : $convertersTerm[$i].apply(${eval.value});" (argTerm, initFuncArgs) }.unzip ``` Just to avoid side effects and be more compliant to functional programming...
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org