Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19480#discussion_r147446840
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 ---
    @@ -801,10 +831,82 @@ class CodegenContext {
                |  ${makeSplitFunction(body)}
                |}
              """.stripMargin
    -        addNewFunction(name, code)
    +        addNewFunctionInternal(name, code, inlineToOuterClass = false)
           }
     
    -      foldFunctions(functions.map(name => 
s"$name(${arguments.map(_._2).mkString(", ")})"))
    +      val (outerClassFunctions, innerClassFunctions) = 
functions.partition(_.innerClassName.isEmpty)
    +
    +      val argsString = arguments.map(_._2).mkString(", ")
    +      val outerClassFunctionCalls = outerClassFunctions.map(f => 
s"${f.functionName}($argsString)")
    +
    +      val innerClassFunctionCalls = generateInnerClassesFunctionCalls(
    +        innerClassFunctions,
    +        func,
    +        arguments,
    +        returnType,
    +        makeSplitFunction,
    +        foldFunctions)
    +
    +      foldFunctions(outerClassFunctionCalls ++ innerClassFunctionCalls)
    +    }
    +  }
    +
    +  /**
    +   * Here we handle all the methods which have been added to the inner 
classes and
    +   * not to the outer class.
    +   * Since they can be many, their direct invocation in the outer class 
adds many entries
    +   * to the outer class' constant pool. This can cause the constant pool 
to past JVM limit.
    +   * Moreover, this can cause also the outer class method where all the 
invocations are
    +   * performed to grow beyond the 64k limit.
    +   * To avoid these problems, we group them and we call only the grouping 
methods in the
    +   * outer class.
    +   *
    +   * @param functions a [[Seq]] of [[NewFunctionSpec]] defined in the 
inner classes
    +   * @param funcName the split function name base.
    +   * @param arguments the list of (type, name) of the arguments of the 
split function.
    +   * @param returnType the return type of the split function.
    +   * @param makeSplitFunction makes split function body, e.g. add 
preparation or cleanup.
    +   * @param foldFunctions folds the split function calls.
    +   * @return an [[Iterable]] containing the methods' invocations
    +   */
    +  private def generateInnerClassesFunctionCalls(
    +      functions: Seq[NewFunctionSpec],
    +      funcName: String,
    +      arguments: Seq[(String, String)],
    +      returnType: String,
    +      makeSplitFunction: String => String,
    +      foldFunctions: Seq[String] => String): Iterable[String] = {
    +    val innerClassToFunctions = mutable.LinkedHashMap.empty[(String, 
String), Seq[String]]
    +    functions.foreach(f => {
    +      val key = (f.innerClassName.get, f.innerClassInstance.get)
    +      innerClassToFunctions.update(key, f.functionName +:
    +        innerClassToFunctions.getOrElse(key, Seq.empty[String]))
    +    })
    +
    +    val argDefinitionString = arguments.map { case (t, name) => s"$t 
$name" }.mkString(", ")
    +    val argInvocationString = arguments.map(_._2).mkString(", ")
    +
    +    innerClassToFunctions.flatMap {
    +      case ((innerClassName, innerClassInstance), innerClassFunctions) =>
    +        // for performance reasons, the functions are prepended, instead 
of appended,
    +        // thus here they are in reversed order
    +        val orderedFunctions = innerClassFunctions.reverse
    +        if (orderedFunctions.size > 
CodeGenerator.MERGE_SPLIT_METHODS_THRESHOLD) {
    +          // Adding a new function to each inner class which contains
    +          // the invocation of all the ones which have been added to
    +          // that inner class
    +          val body = foldFunctions(orderedFunctions.map(name =>
    +            s"$name($argInvocationString)"))
    +          val code = s"""
    +              |private $returnType $funcName($argDefinitionString) {
    +              |  ${makeSplitFunction(body)}
    +              |}
    +            """.stripMargin
    --- End diff --
    
    ```Scala
              // Adding a new function to each inner class which contains the 
invocation of all the
              // ones which have been added to that inner class. For example,
              //   private class NestedClass {
              //     private void apply_862(InternalRow i) { ... }
              //     private void apply_863(InternalRow i) { ... }
              //       ...
              //     private void apply(InternalRow i) {
              //       apply_862(i);
              //       apply_863(i);
              //       ...
              //     }
              //   }
              val body = foldFunctions(orderedFunctions.map(name => 
s"$name($argInvocationString)"))
              val code =
                s"""
                  |private $returnType $funcName($argDefinitionString) {
                  |  ${makeSplitFunction(body)}
                  |}
                 """.stripMargin
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to