Github user ueshin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/18075#discussion_r118872149
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 ---
    @@ -233,10 +222,128 @@ class CodegenContext {
       // The collection of sub-expression result resetting methods that need 
to be called on each row.
       val subexprFunctions = mutable.ArrayBuffer.empty[String]
     
    -  def declareAddedFunctions(): String = {
    -    addedFunctions.map { case (funcName, funcCode) => funcCode 
}.mkString("\n")
    +  /**
    +   * Holds the class and instance names to be generated. `OuterClass` is a 
placeholder standing for
    +   * whichever class is generated as the outermost class and which will 
contain any nested
    +   * sub-classes. All other classes and instance names in this list will 
represent private, nested
    +   * sub-classes.
    +   */
    +  private val classes: mutable.ListBuffer[(String, String)] =
    +    mutable.ListBuffer[(String, String)]("OuterClass" -> null)
    +
    +  // A map holding the current size in bytes of each class to be generated.
    +  private val classSize: mutable.Map[String, Int] =
    +    mutable.Map[String, Int]("OuterClass" -> 0)
    +
    +  // A map holding lists of functions belonging to their class.
    +  private val classFunctions: mutable.Map[String, 
mutable.ListBuffer[String]] =
    +    mutable.Map("OuterClass" -> mutable.ListBuffer.empty[String])
    +
    +  // Returns the size of the most recently added class.
    +  private def currClassSize(): Int = classSize(classes.head._1)
    +
    +  // Returns the class name and instance name for the most recently added 
class.
    +  private def currClass(): (String, String) = classes.head
    +
    +  // Adds a new class. Requires the class' name, and its instance name.
    +  private def addClass(className: String, classInstance: String): Unit = {
    +    classes.prepend(Tuple2(className, classInstance))
    +    classSize += className -> 0
    +    classFunctions += className -> mutable.ListBuffer.empty[String]
    +  }
    +
    +  /**
    +   * Adds a function to the generated class. If the code for the 
`OuterClass` grows too large, the
    +   * function will be inlined into a new private, nested class, and a 
class-qualified name for the
    +   * function will be returned. Otherwise, the function will be inined to 
the `OuterClass` the
    +   * simple `funcName` will be returned.
    +   *
    +   * @param funcName the class-unqualified name of the function
    +   * @param funcCode the body of the function
    +   * @param inlineToOuterClass whether the given code must be inlined to 
the `OuterClass`. This
    +   *                           can be necessary when a function is 
declared outside of the context
    +   *                           it is eventually referenced and a returned 
qualified function name
    +   *                           cannot otherwise be accessed.
    +   * @return the name of the function, qualified by class if it will be 
inlined to a private,
    +   *         nested sub-class
    +   */
    +  def addNewFunction(
    +    funcName: String,
    +    funcCode: String,
    +    inlineToOuterClass: Boolean = false): String = {
    +    // The number of named constants that can exist in the class is 
limited by the Constant Pool
    +    // limit, 65,536. We cannot know how many constants will be inserted 
for a class, so we use a
    +    // threshold of 1600k bytes to determine when a function should be 
inlined to a private, nested
    +    // sub-class.
    +    val classInfo = if (inlineToOuterClass) {
    +      "OuterClass" -> ""
    +    } else if (currClassSize > 1600000) {
    +      val className = freshName("NestedClass")
    +      val classInstance = freshName("nestedClassInstance")
    +
    +      addClass(className, classInstance)
    +
    +      className -> classInstance
    +    } else {
    +      currClass()
    +    }
    +    val name = classInfo._1
    +
    +    classSize.update(name, classSize(name) + funcCode.length)
    +    classFunctions(name).append(funcCode)
    --- End diff --
    
    How about:
    
    ```scala
        classSize(name) += funcCode.length
        classFunctions(name) += funcCode
    ```



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to