Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19480#discussion_r147234340
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 ---
    @@ -801,10 +831,84 @@ class CodegenContext {
                |  ${makeSplitFunction(body)}
                |}
              """.stripMargin
    -        addNewFunction(name, code)
    +        addNewFunctionInternal(name, code, inlineToOuterClass = false)
           }
     
    -      foldFunctions(functions.map(name => 
s"$name(${arguments.map(_._2).mkString(", ")})"))
    +      // Here we store all the methods which have been added to the outer 
class.
    +      val outerClassFunctions = functions
    +        .filter(_.innerClassName.isEmpty)
    +        .map(_.functionName)
    +
    +      val innerClassFunctions = generateInnerClassesMethodsCalls(
    +        functions.filter(_.innerClassName.nonEmpty),
    +        func,
    +        arguments,
    +        returnType,
    +        makeSplitFunction,
    +        foldFunctions)
    +
    +      val argsString = arguments.map(_._2).mkString(", ")
    +      foldFunctions((outerClassFunctions ++ innerClassFunctions).map(
    +        name => s"$name($argsString)"))
    +    }
    +  }
    +
    +  /**
    +   * Here we handle all the methods which have been added to the inner 
classes and
    +   * not to the outer class.
    +   * Since they can be many, their direct invocation in the outer class 
adds many entries
    +   * to the outer class' constant pool. This can cause the constant pool 
to past JVM limit.
    +   * Moreover, this can cause also the outer class method where all the 
invocations are
    +   * performed to grow beyond the 64k limit.
    +   * To avoid these problems, we group them and we call only the grouping 
methods in the
    +   * outer class.
    +   *
    +   * @param functions a [[Seq]] of [[NewFunctionSpec]] defined in the 
inner classes
    +   * @param funcName the split function name base.
    +   * @param arguments the list of (type, name) of the arguments of the 
split function.
    +   * @param returnType the return type of the split function.
    +   * @param makeSplitFunction makes split function body, e.g. add 
preparation or cleanup.
    +   * @param foldFunctions folds the split function calls.
    +   * @return an [[Iterable]] containing the methods' invocations
    +   */
    +  private def generateInnerClassesMethodsCalls(
    +      functions: Seq[NewFunctionSpec],
    +      funcName: String,
    +      arguments: Seq[(String, String)],
    +      returnType: String,
    +      makeSplitFunction: String => String,
    +      foldFunctions: Seq[String] => String): Iterable[String] = {
    +    val innerClassToFunctions = mutable.ListMap.empty[(String, String), 
Seq[String]]
    +    functions.foreach(f => {
    +      val key = (f.innerClassName.get, f.innerClassInstance.get)
    +      innerClassToFunctions.update(key, f.functionName +:
    +        innerClassToFunctions.getOrElse(key, Seq.empty[String]))
    +    })
    +    // for performance reasons, the functions are prepended, instead of 
appended,
    +    // thus they are in reversed order
    +    innerClassToFunctions.transform { case (_, functions) => 
functions.reverse }
    +
    +    val argDefinitionString = arguments.map { case (t, name) => s"$t 
$name" }.mkString(", ")
    +    val argInvocationString = arguments.map(_._2).mkString(", ")
    +
    +    innerClassToFunctions.flatMap {
    +      case ((innerClassName, innerClassInstance), innerClassFunctions) =>
    +        if (innerClassFunctions.size > 
CodeGenerator.MERGE_SPLIT_METHODS_THRESHOLD) {
    +          // Adding a new function to each inner class which contains
    +          // the invocation of all the ones which have been added to
    +          // that inner class
    +          val body = foldFunctions(innerClassFunctions.map(name =>
    +            s"$name($argInvocationString)"))
    +          val code = s"""
    +              |private $returnType $funcName($argDefinitionString) {
    +              |  ${makeSplitFunction(body)}
    +              |}
    +            """.stripMargin
    +          addNewFunctionToClass(funcName, code, innerClassName)
    +          Seq(s"$innerClassInstance.$funcName")
    --- End diff --
    
    The same here.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to