rednaxelafx commented on a change in pull request #24735: [SPARK-27871][SQL] 
LambdaVariable should use per-query unique IDs instead of globally unique IDs
URL: https://github.com/apache/spark/pull/24735#discussion_r288700780
 
 

 ##########
 File path: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
 ##########
 @@ -228,3 +228,31 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] {
       }
   }
 }
+
+/**
+ * Reassigns per-query unique IDs to `LambdaVariable`s, whose original IDs are 
globally unique. This
+ * can help Spark to hit codegen cache more often and improve performance.
+ */
+object ReassignLambdaVariableID extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    if (!SQLConf.get.getConf(SQLConf.OPTIMIZER_REASSIGN_LAMBDA_VARIABLE_ID)) 
return plan
+
+    // The original LambdaVariable IDs are all positive. To avoid conflicts, 
the new IDs are all
+    // negative and starts with -1.
 
 Review comment:
   Nit: "starts from"

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to