Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/22326#discussion_r220272571 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala --- @@ -1304,10 +1307,27 @@ object CheckCartesianProducts extends Rule[LogicalPlan] with PredicateHelper { } } + /** + * Check if a join contains PythonUDF in join condition. + */ + def hasPythonUDFInJoinCondition(join: Join): Boolean = { + val conditions = join.condition.map(splitConjunctivePredicates).getOrElse(Nil) + conditions.exists(HandlePythonUDFInJoinCondition.hasPythonUDF) + } + def apply(plan: LogicalPlan): LogicalPlan = if (SQLConf.get.crossJoinEnabled) { plan } else plan transform { + case j @ Join(_, _, _, _) if hasPythonUDFInJoinCondition(j) => --- End diff -- if there are other conditions, the presence of a pythoUDF doesn't imply a cartesian product. I think this should be a `havePythonUDFInAllConditions`
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org