alamb commented on code in PR #10431:
URL: https://github.com/apache/datafusion/pull/10431#discussion_r1598446148


##########
datafusion/optimizer/src/eliminate_cross_join.rs:
##########
@@ -39,102 +41,147 @@ impl EliminateCrossJoin {
     }
 }
 
-/// Attempt to reorder join to eliminate cross joins to inner joins.
-/// for queries:
-/// 'select ... from a, b where a.x = b.y and b.xx = 100;'
-/// 'select ... from a, b where (a.x = b.y and b.xx = 100) or (a.x = b.y and 
b.xx = 200);'
-/// 'select ... from a, b, c where (a.x = b.y and b.xx = 100 and a.z = c.z)
-/// or (a.x = b.y and b.xx = 200 and a.z=c.z);'
-/// 'select ... from a, b where a.x > b.y'
+/// Eliminate cross joins by rewriting them to inner joins when possible.
+///
+/// # Example
+/// The initial plan for this query:
+/// ```sql
+/// select ... from a, b where a.x = b.y and b.xx = 100;
+/// ```
+///
+/// Looks like this:
+/// ```text
+/// Filter(a.x = b.y AND b.xx = 100)
+///  CrossJoin
+///   TableScan a
+///   TableScan b
+/// ```
+///
+/// After the rule is applied, the plan will look like this:
+/// ```text
+/// Filter(b.xx = 100)
+///   InnerJoin(a.x = b.y)
+///     TableScan a
+///     TableScan b
+/// ```
+///
+/// # Other Examples
+/// * 'select ... from a, b where a.x = b.y and b.xx = 100;'
+/// * 'select ... from a, b where (a.x = b.y and b.xx = 100) or (a.x = b.y and 
b.xx = 200);'
+/// * 'select ... from a, b, c where (a.x = b.y and b.xx = 100 and a.z = c.z)
+/// * or (a.x = b.y and b.xx = 200 and a.z=c.z);'
+/// * 'select ... from a, b where a.x > b.y'
+///
 /// For above queries, the join predicate is available in filters and they are 
moved to
 /// join nodes appropriately
+///
 /// This fix helps to improve the performance of TPCH Q19. issue#78
 impl OptimizerRule for EliminateCrossJoin {
     fn try_optimize(
         &self,
-        plan: &LogicalPlan,
-        config: &dyn OptimizerConfig,
+        _plan: &LogicalPlan,
+        _config: &dyn OptimizerConfig,
     ) -> Result<Option<LogicalPlan>> {
+        internal_err!("Should have called EliminateCrossJoin::rewrite")
+    }
+
+    fn supports_rewrite(&self) -> bool {
+        true
+    }
+
+    fn rewrite(
+        &self,
+        plan: LogicalPlan,
+        config: &dyn OptimizerConfig,
+    ) -> Result<Transformed<LogicalPlan>> {
+        let plan_schema = plan.schema().clone();
         let mut possible_join_keys = JoinKeySet::new();
         let mut all_inputs: Vec<LogicalPlan> = vec![];
-        let parent_predicate = match plan {
-            LogicalPlan::Filter(filter) => {
-                let input = filter.input.as_ref();
-                match input {
-                    LogicalPlan::Join(Join {
-                        join_type: JoinType::Inner,
-                        ..
-                    })
-                    | LogicalPlan::CrossJoin(_) => {
-                        if !try_flatten_join_inputs(
-                            input,
-                            &mut possible_join_keys,
-                            &mut all_inputs,
-                        )? {
-                            return Ok(None);
-                        }
-                        extract_possible_join_keys(
-                            &filter.predicate,
-                            &mut possible_join_keys,
-                        );
-                        Some(&filter.predicate)
-                    }
-                    _ => {
-                        return utils::optimize_children(self, plan, config);
-                    }
-                }
+
+        let can_flatten_inputs = can_flatten_join_inputs(&plan);

Review Comment:
   this took a fair amount of finagling to split up the cases where the rewrite 
will happen and thus we should destructure the Filter and when not. I believe 
the logic is all the same, but the code needed to be reorganized



##########
datafusion/optimizer/src/eliminate_cross_join.rs:
##########
@@ -39,102 +41,147 @@ impl EliminateCrossJoin {
     }
 }
 
-/// Attempt to reorder join to eliminate cross joins to inner joins.
-/// for queries:
-/// 'select ... from a, b where a.x = b.y and b.xx = 100;'
-/// 'select ... from a, b where (a.x = b.y and b.xx = 100) or (a.x = b.y and 
b.xx = 200);'
-/// 'select ... from a, b, c where (a.x = b.y and b.xx = 100 and a.z = c.z)
-/// or (a.x = b.y and b.xx = 200 and a.z=c.z);'
-/// 'select ... from a, b where a.x > b.y'
+/// Eliminate cross joins by rewriting them to inner joins when possible.

Review Comment:
   While I was in here and had it all in my head, I updated the documentation 
to explain how things worked



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to