retronym commented on a change in pull request #28463:
URL: https://github.com/apache/spark/pull/28463#discussion_r422014834



##########
File path: core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
##########
@@ -414,6 +434,284 @@ private[spark] object ClosureCleaner extends Logging {
   }
 }
 
+private[spark] object IndylambdaScalaClosures extends Logging {
+  // internal name of java.lang.invoke.LambdaMetafactory
+  val LambdaMetafactoryClassName = "java/lang/invoke/LambdaMetafactory"
+  // the method that Scala indylambda use for bootstrap method
+  val LambdaMetafactoryMethodName = "altMetafactory"
+  val LambdaMetafactoryMethodDesc = "(Ljava/lang/invoke/MethodHandles$Lookup;" 
+
+    "Ljava/lang/String;Ljava/lang/invoke/MethodType;[Ljava/lang/Object;)" +
+    "Ljava/lang/invoke/CallSite;"
+
+  /**
+   * Check if the given reference is a indylambda style Scala closure.
+   * If so, return a non-empty serialization proxy (SerializedLambda) of the 
closure;
+   * otherwise return None.
+   *
+   * @param maybeClosure the closure to check.
+   */
+  def getSerializationProxy(maybeClosure: AnyRef): Option[SerializedLambda] = {
+    val maybeClosureClass = maybeClosure.getClass
+
+    // shortcut the fast check:
+    // 1. indylambda closure classes are generated by Java's 
LambdaMetafactory, and they're always
+    //    synthetic.
+    // 2. We only care about Serializable closures, so let's check that as well
+    if (!maybeClosureClass.isSynthetic || 
!maybeClosure.isInstanceOf[Serializable]) return None
+
+    val implementedInterfaces = 
ClassUtils.getAllInterfaces(maybeClosureClass).asScala
+    val isClosureCandidate = 
implementedInterfaces.exists(_.getName.startsWith("scala.Function"))
+
+    if (isClosureCandidate) {
+      try {
+        val lambdaProxy = inspect(maybeClosure)
+        if (isIndylambdaScalaClosure(lambdaProxy)) Option(lambdaProxy)
+        else None
+      } catch {
+        case e: Exception =>
+          // no need to check if debug is enabled here the Spark logging api 
covers this.
+          logDebug("The given reference is not an indylambda Scala closure.", 
e)
+          None
+      }
+    } else {
+      None
+    }
+  }
+
+  def isIndylambdaScalaClosure(lambdaProxy: SerializedLambda): Boolean = {
+    lambdaProxy.getImplMethodKind == MethodHandleInfo.REF_invokeStatic &&
+      lambdaProxy.getImplMethodName.contains("$anonfun$")
+  }
+
+  def inspect(closure: AnyRef): SerializedLambda = {
+    val writeReplace = closure.getClass.getDeclaredMethod("writeReplace")
+    writeReplace.setAccessible(true)
+    writeReplace.invoke(closure).asInstanceOf[SerializedLambda]
+  }
+
+  /**
+   * Check if the handle represents the LambdaMetafactory that indylambda 
Scala closures
+   * use for creating the lambda class and getting a closure instance.
+   */
+  def isLambdaMetafactory(bsmHandle: Handle): Boolean = {
+    bsmHandle.getOwner == LambdaMetafactoryClassName &&
+      bsmHandle.getName == LambdaMetafactoryMethodName &&
+      bsmHandle.getDesc == LambdaMetafactoryMethodDesc
+  }
+
+  /**
+   * Check if the handle represents a target method that is:
+   * - a STATIC method that implements a Scala lambda body in the indylambda 
style
+   * - captures the enclosing `this`, i.e. the first argument is a reference 
to the same type as
+   *   the owning class.
+   * Returns true if both criteria above are met.
+   */
+  def isLambdaBodyCapturingOuter(handle: Handle, ownerInternalName: String): 
Boolean = {
+    handle.getTag == H_INVOKESTATIC &&
+      handle.getName.contains("$anonfun$") &&
+      handle.getOwner == ownerInternalName &&
+      handle.getDesc.startsWith(s"(L$ownerInternalName;")
+  }
+
+  /**
+   * Check if the callee of a call site is a inner class constructor.
+   * - A constructor has to be invoked via INVOKESPECIAL
+   * - A constructor's internal name is "<init>" and the return type is "V" 
(void)
+   * - An inner class' first argument in the signature has to be a reference 
to the
+   *   enclosing "this", aka `$outer` in Scala.
+   */
+  def isInnerClassCtorCapturingOuter(
+      op: Int, owner: String, name: String, desc: String, callerInternalName: 
String): Boolean = {
+    op == INVOKESPECIAL && name == "<init>" && 
desc.startsWith(s"(L$callerInternalName;")
+  }
+
+  // scalastyle:off line.size.limit
+  /**
+   * Scans an indylambda Scala closure, along with its lexically nested 
closures, and populate
+   * the accessed fields info on which fields on the outer object are accessed.
+   *
+   * Example: run the following code snippet in a Spark Shell w/ Scala 2.12+:

Review comment:
       Looks good!
   
   I'd appreciate if it if (as a followup task) you could update to [Scala 
2.12.11](https://github.com/scala/scala/releases/tag/v2.12.11).
   
   I'd hope you could turn on `-Yrepl-use-magic-imports` by default in the 
Spark REPL. I tested those changes in the Spark build so I'm expecting things 
should work fine.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to