alamb commented on code in PR #10430: URL: https://github.com/apache/datafusion/pull/10430#discussion_r1594715297
########## datafusion/optimizer/src/eliminate_cross_join.rs: ########## @@ -250,90 +265,67 @@ fn find_inner_join( })) } -fn intersect( - accum: &mut Vec<(Expr, Expr)>, - vec1: &[(Expr, Expr)], - vec2: &[(Expr, Expr)], -) { - if !(vec1.is_empty() || vec2.is_empty()) { - for x1 in vec1.iter() { - for x2 in vec2.iter() { - if x1.0 == x2.0 && x1.1 == x2.1 || x1.1 == x2.0 && x1.0 == x2.1 { - accum.push((x1.0.clone(), x1.1.clone())); - } - } - } - } -} - /// Extract join keys from a WHERE clause -fn extract_possible_join_keys(expr: &Expr, accum: &mut Vec<(Expr, Expr)>) -> Result<()> { +fn extract_possible_join_keys(expr: &Expr, join_keys: &mut JoinKeySet) { if let Expr::BinaryExpr(BinaryExpr { left, op, right }) = expr { match op { Operator::Eq => { - // Ensure that we don't add the same Join keys multiple times - if !(accum.contains(&(*left.clone(), *right.clone())) - || accum.contains(&(*right.clone(), *left.clone()))) - { - accum.push((*left.clone(), *right.clone())); - } + // insert handles ensuring we don't add the same Join keys multiple times + join_keys.insert(left, right); } Operator::And => { - extract_possible_join_keys(left, accum)?; - extract_possible_join_keys(right, accum)? + extract_possible_join_keys(left, join_keys); + extract_possible_join_keys(right, join_keys) } // Fix for issue#78 join predicates from inside of OR expr also pulled up properly. Operator::Or => { - let mut left_join_keys = vec![]; - let mut right_join_keys = vec![]; + let mut left_join_keys = JoinKeySet::new(); + let mut right_join_keys = JoinKeySet::new(); - extract_possible_join_keys(left, &mut left_join_keys)?; - extract_possible_join_keys(right, &mut right_join_keys)?; + extract_possible_join_keys(left, &mut left_join_keys); + extract_possible_join_keys(right, &mut right_join_keys); - intersect(accum, &left_join_keys, &right_join_keys) + join_keys.insert_intersection(left_join_keys, right_join_keys) } _ => (), }; } - Ok(()) } /// Remove join expressions from a filter expression -/// Returns Some() when there are few remaining predicates in filter_expr -/// Returns None otherwise -fn remove_join_expressions( - expr: &Expr, - join_keys: &HashSet<(Expr, Expr)>, -) -> Result<Option<Expr>> { +/// +/// # Returns +/// * `Some()` when there are few remaining predicates in filter_expr +/// * `None` otherwise +fn remove_join_expressions(expr: Expr, join_keys: &JoinKeySet) -> Option<Expr> { Review Comment: This API previously cloned on all paths (including to check join keys) -- the new API does not Also it never returns `Err` so I changed the signature to `Option` from `Result<Option<..>>` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org