This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 490fdf99e7 Minor: format comments in filter pushdown rule (#10437)
490fdf99e7 is described below

commit 490fdf99e77a02a2d5952cd6634c269d14d862b4
Author: Andrew Lamb <[email protected]>
AuthorDate: Fri May 10 15:13:28 2024 -0400

    Minor: format comments in filter pushdown rule (#10437)
---
 datafusion/optimizer/src/push_down_filter.rs | 147 ++++++++++++++-------------
 1 file changed, 74 insertions(+), 73 deletions(-)

diff --git a/datafusion/optimizer/src/push_down_filter.rs 
b/datafusion/optimizer/src/push_down_filter.rs
index f58345237b..9ce135b0d6 100644
--- a/datafusion/optimizer/src/push_down_filter.rs
+++ b/datafusion/optimizer/src/push_down_filter.rs
@@ -131,25 +131,25 @@ use crate::{OptimizerConfig, OptimizerRule};
 #[derive(Default)]
 pub struct PushDownFilter {}
 
-// For a given JOIN logical plan, determine whether each side of the join is 
preserved.
-// We say a join side is preserved if the join returns all or a subset of the 
rows from
-// the relevant side, such that each row of the output table directly maps to 
a row of
-// the preserved input table. If a table is not preserved, it can provide 
extra null rows.
-// That is, there may be rows in the output table that don't directly map to a 
row in the
-// input table.
-//
-// For example:
-//   - In an inner join, both sides are preserved, because each row of the 
output
-//     maps directly to a row from each side.
-//   - In a left join, the left side is preserved and the right is not, because
-//     there may be rows in the output that don't directly map to a row in the
-//     right input (due to nulls filling where there is no match on the right).
-//
-// This is important because we can always push down post-join filters to a 
preserved
-// side of the join, assuming the filter only references columns from that 
side. For the
-// non-preserved side it can be more tricky.
-//
-// Returns a tuple of booleans - (left_preserved, right_preserved).
+/// For a given JOIN logical plan, determine whether each side of the join is 
preserved.
+/// We say a join side is preserved if the join returns all or a subset of the 
rows from
+/// the relevant side, such that each row of the output table directly maps to 
a row of
+/// the preserved input table. If a table is not preserved, it can provide 
extra null rows.
+/// That is, there may be rows in the output table that don't directly map to 
a row in the
+/// input table.
+///
+/// For example:
+///   - In an inner join, both sides are preserved, because each row of the 
output
+///     maps directly to a row from each side.
+///   - In a left join, the left side is preserved and the right is not, 
because
+///     there may be rows in the output that don't directly map to a row in the
+///     right input (due to nulls filling where there is no match on the 
right).
+///
+/// This is important because we can always push down post-join filters to a 
preserved
+/// side of the join, assuming the filter only references columns from that 
side. For the
+/// non-preserved side it can be more tricky.
+///
+/// Returns a tuple of booleans - (left_preserved, right_preserved).
 fn lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, bool)> {
     match plan {
         LogicalPlan::Join(Join { join_type, .. }) => match join_type {
@@ -169,9 +169,10 @@ fn lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, 
bool)> {
     }
 }
 
-// For a given JOIN logical plan, determine whether each side of the join is 
preserved
-// in terms on join filtering.
-// Predicates from join filter can only be pushed to preserved join side.
+/// For a given JOIN logical plan, determine whether each side of the join is 
preserved
+/// in terms on join filtering.
+///
+/// Predicates from join filter can only be pushed to preserved join side.
 fn on_lr_is_preserved(plan: &LogicalPlan) -> Result<(bool, bool)> {
     match plan {
         LogicalPlan::Join(Join { join_type, .. }) => match join_type {
@@ -190,11 +191,11 @@ fn on_lr_is_preserved(plan: &LogicalPlan) -> 
Result<(bool, bool)> {
     }
 }
 
-// Determine which predicates in state can be pushed down to a given side of a 
join.
-// To determine this, we need to know the schema of the relevant join side and 
whether
-// or not the side's rows are preserved when joining. If the side is not 
preserved, we
-// do not push down anything. Otherwise we can push down predicates where all 
of the
-// relevant columns are contained on the relevant join side's schema.
+/// Determine which predicates in state can be pushed down to a given side of 
a join.
+/// To determine this, we need to know the schema of the relevant join side 
and whether
+/// or not the side's rows are preserved when joining. If the side is not 
preserved, we
+/// do not push down anything. Otherwise we can push down predicates where all 
of the
+/// relevant columns are contained on the relevant join side's schema.
 fn can_pushdown_join_predicate(predicate: &Expr, schema: &DFSchema) -> 
Result<bool> {
     let schema_columns = schema
         .iter()
@@ -215,7 +216,7 @@ fn can_pushdown_join_predicate(predicate: &Expr, schema: 
&DFSchema) -> Result<bo
         == columns.len())
 }
 
-// Determine whether the predicate can evaluate as the join conditions
+/// Determine whether the predicate can evaluate as the join conditions
 fn can_evaluate_as_join_condition(predicate: &Expr) -> Result<bool> {
     let mut is_evaluate = true;
     predicate.apply(|expr| match expr {
@@ -261,39 +262,39 @@ fn can_evaluate_as_join_condition(predicate: &Expr) -> 
Result<bool> {
     Ok(is_evaluate)
 }
 
-// examine OR clause to see if any useful clauses can be extracted and push 
down.
-// extract at least one qual from each sub clauses of OR clause, then form the 
quals
-// to new OR clause as predicate.
-//
-// Filter: (a = c and a < 20) or (b = d and b > 10)
-//     join/crossjoin:
-//          TableScan: projection=[a, b]
-//          TableScan: projection=[c, d]
-//
-// is optimized to
-//
-// Filter: (a = c and a < 20) or (b = d and b > 10)
-//     join/crossjoin:
-//          Filter: (a < 20) or (b > 10)
-//              TableScan: projection=[a, b]
-//          TableScan: projection=[c, d]
-//
-// In general, predicates of this form:
-//
-// (A AND B) OR (C AND D)
-//
-// will be transformed to
-//
-// ((A AND B) OR (C AND D)) AND (A OR C)
-//
-// OR
-//
-// ((A AND B) OR (C AND D)) AND ((A AND B) OR C)
-//
-// OR
-//
-// do nothing.
-//
+/// examine OR clause to see if any useful clauses can be extracted and push 
down.
+/// extract at least one qual from each sub clauses of OR clause, then form 
the quals
+/// to new OR clause as predicate.
+///
+/// # Example
+/// ```text
+/// Filter: (a = c and a < 20) or (b = d and b > 10)
+///     join/crossjoin:
+///          TableScan: projection=[a, b]
+///          TableScan: projection=[c, d]
+/// ```
+///
+/// is optimized to
+///
+/// ```text
+/// Filter: (a = c and a < 20) or (b = d and b > 10)
+///     join/crossjoin:
+///          Filter: (a < 20) or (b > 10)
+///              TableScan: projection=[a, b]
+///          TableScan: projection=[c, d]
+/// ```
+///
+/// In general, predicates of this form:
+///
+/// ```sql
+/// (A AND B) OR (C AND D)
+/// ```
+///
+/// will be transformed to one of:
+///
+/// * `((A AND B) OR (C AND D)) AND (A OR C)`
+/// * `((A AND B) OR (C AND D)) AND ((A AND B) OR C)`
+/// * do nothing.
 fn extract_or_clauses_for_join<'a>(
     filters: &'a [Expr],
     schema: &'a DFSchema,
@@ -329,17 +330,17 @@ fn extract_or_clauses_for_join<'a>(
     })
 }
 
-// extract qual from OR sub-clause.
-//
-// A qual is extracted if it only contains set of column references in 
schema_columns.
-//
-// For AND clause, we extract from both sub-clauses, then make new AND clause 
by extracted
-// clauses if both extracted; Otherwise, use the extracted clause from any 
sub-clauses or None.
-//
-// For OR clause, we extract from both sub-clauses, then make new OR clause by 
extracted clauses if both extracted;
-// Otherwise, return None.
-//
-// For other clause, apply the rule above to extract clause.
+/// extract qual from OR sub-clause.
+///
+/// A qual is extracted if it only contains set of column references in 
schema_columns.
+///
+/// For AND clause, we extract from both sub-clauses, then make new AND clause 
by extracted
+/// clauses if both extracted; Otherwise, use the extracted clause from any 
sub-clauses or None.
+///
+/// For OR clause, we extract from both sub-clauses, then make new OR clause 
by extracted clauses if both extracted;
+/// Otherwise, return None.
+///
+/// For other clause, apply the rule above to extract clause.
 fn extract_or_clause(expr: &Expr, schema_columns: &HashSet<Column>) -> 
Option<Expr> {
     let mut predicate = None;
 
@@ -396,7 +397,7 @@ fn extract_or_clause(expr: &Expr, schema_columns: 
&HashSet<Column>) -> Option<Ex
     predicate
 }
 
-// push down join/cross-join
+/// push down join/cross-join
 fn push_down_all_join(
     predicates: Vec<Expr>,
     infer_predicates: Vec<Expr>,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to