rednaxelafx commented on a change in pull request #28463:
URL: https://github.com/apache/spark/pull/28463#discussion_r422007067



##########
File path: core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
##########
@@ -414,6 +434,284 @@ private[spark] object ClosureCleaner extends Logging {
   }
 }
 
+private[spark] object IndylambdaScalaClosures extends Logging {
+  // internal name of java.lang.invoke.LambdaMetafactory
+  val LambdaMetafactoryClassName = "java/lang/invoke/LambdaMetafactory"
+  // the method that Scala indylambda use for bootstrap method
+  val LambdaMetafactoryMethodName = "altMetafactory"
+  val LambdaMetafactoryMethodDesc = "(Ljava/lang/invoke/MethodHandles$Lookup;" 
+
+    "Ljava/lang/String;Ljava/lang/invoke/MethodType;[Ljava/lang/Object;)" +
+    "Ljava/lang/invoke/CallSite;"
+
+  /**
+   * Check if the given reference is a indylambda style Scala closure.
+   * If so, return a non-empty serialization proxy (SerializedLambda) of the 
closure;
+   * otherwise return None.
+   *
+   * @param maybeClosure the closure to check.
+   */
+  def getSerializationProxy(maybeClosure: AnyRef): Option[SerializedLambda] = {
+    val maybeClosureClass = maybeClosure.getClass
+
+    // shortcut the fast check:
+    // 1. indylambda closure classes are generated by Java's 
LambdaMetafactory, and they're always
+    //    synthetic.
+    // 2. We only care about Serializable closures, so let's check that as well
+    if (!maybeClosureClass.isSynthetic || 
!maybeClosure.isInstanceOf[Serializable]) return None
+
+    val implementedInterfaces = 
ClassUtils.getAllInterfaces(maybeClosureClass).asScala
+    val isClosureCandidate = 
implementedInterfaces.exists(_.getName.startsWith("scala.Function"))
+
+    if (isClosureCandidate) {
+      try {
+        val lambdaProxy = inspect(maybeClosure)
+        if (isIndylambdaScalaClosure(lambdaProxy)) Option(lambdaProxy)
+        else None
+      } catch {
+        case e: Exception =>
+          // no need to check if debug is enabled here the Spark logging api 
covers this.
+          logDebug("The given reference is not an indylambda Scala closure.", 
e)
+          None
+      }
+    } else {
+      None
+    }
+  }
+
+  def isIndylambdaScalaClosure(lambdaProxy: SerializedLambda): Boolean = {
+    lambdaProxy.getImplMethodKind == MethodHandleInfo.REF_invokeStatic &&
+      lambdaProxy.getImplMethodName.contains("$anonfun$")
+  }
+
+  def inspect(closure: AnyRef): SerializedLambda = {
+    val writeReplace = closure.getClass.getDeclaredMethod("writeReplace")
+    writeReplace.setAccessible(true)
+    writeReplace.invoke(closure).asInstanceOf[SerializedLambda]
+  }
+
+  /**
+   * Check if the handle represents the LambdaMetafactory that indylambda 
Scala closures
+   * use for creating the lambda class and getting a closure instance.
+   */
+  def isLambdaMetafactory(bsmHandle: Handle): Boolean = {
+    bsmHandle.getOwner == LambdaMetafactoryClassName &&
+      bsmHandle.getName == LambdaMetafactoryMethodName &&
+      bsmHandle.getDesc == LambdaMetafactoryMethodDesc
+  }
+
+  /**
+   * Check if the handle represents a target method that is:
+   * - a STATIC method that implements a Scala lambda body in the indylambda 
style
+   * - captures the enclosing `this`, i.e. the first argument is a reference 
to the same type as
+   *   the owning class.
+   * Returns true if both criteria above are met.
+   */
+  def isLambdaBodyCapturingOuter(handle: Handle, ownerInternalName: String): 
Boolean = {
+    handle.getTag == H_INVOKESTATIC &&
+      handle.getName.contains("$anonfun$") &&
+      handle.getOwner == ownerInternalName &&
+      handle.getDesc.startsWith(s"(L$ownerInternalName;")
+  }
+
+  /**
+   * Check if the callee of a call site is a inner class constructor.
+   * - A constructor has to be invoked via INVOKESPECIAL
+   * - A constructor's internal name is "<init>" and the return type is "V" 
(void)
+   * - An inner class' first argument in the signature has to be a reference 
to the
+   *   enclosing "this", aka `$outer` in Scala.
+   */
+  def isInnerClassCtorCapturingOuter(
+      op: Int, owner: String, name: String, desc: String, callerInternalName: 
String): Boolean = {
+    op == INVOKESPECIAL && name == "<init>" && 
desc.startsWith(s"(L$callerInternalName;")
+  }
+
+  // scalastyle:off line.size.limit
+  /**
+   * Scans an indylambda Scala closure, along with its lexically nested 
closures, and populate
+   * the accessed fields info on which fields on the outer object are accessed.
+   *
+   * Example: run the following code snippet in a Spark Shell w/ Scala 2.12+:

Review comment:
       `-Yrepl-use-magic-imports` doesn't seem to be available in the Scala 
version that Spark master is using (Scala 2.12.10). I can only get `bad option: 
'-Yrepl-use-magic-imports:true'`.
   
   The bits that I put into the comment already omitted the `ignoring` lines. 
It's just the nested `$iw`s and the super long lines from the `invokedynamic` 
logging that makes this trace super wide...
   
   I've updated the PR again to make the example logs slightly less annoying. 
Please help take another look, thanks!




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to