This is an automated email from the ASF dual-hosted git repository.

ozankabak pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 1d4f329984 During expression equality, check for new ordering 
information (#10434)
1d4f329984 is described below

commit 1d4f329984d0ef8486d7cf36ff3c5f84348fae84
Author: Mustafa Akur <[email protected]>
AuthorDate: Fri May 10 13:09:17 2024 +0300

    During expression equality, check for new ordering information (#10434)
    
    * Move ordering discovery to equality check
    
    * Minor changes
    
    * Minor changes
    
    * Apply suggestions from code review
    
    Co-authored-by: Mehmet Ozan Kabak <[email protected]>
    
    ---------
    
    Co-authored-by: Mehmet Ozan Kabak <[email protected]>
---
 datafusion/physical-expr/src/equivalence/mod.rs    | 44 +-------------
 .../physical-expr/src/equivalence/properties.rs    | 67 +++++++++++++++++++++-
 2 files changed, 67 insertions(+), 44 deletions(-)

diff --git a/datafusion/physical-expr/src/equivalence/mod.rs 
b/datafusion/physical-expr/src/equivalence/mod.rs
index 7fc27172e4..3ce641c5aa 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -18,7 +18,6 @@
 use std::sync::Arc;
 
 use crate::expressions::Column;
-use crate::sort_properties::SortProperties;
 use crate::{LexRequirement, PhysicalExpr, PhysicalSortRequirement};
 
 use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
@@ -47,48 +46,7 @@ pub fn collapse_lex_req(input: LexRequirement) -> 
LexRequirement {
             output.push(item);
         }
     }
-    collapse_monotonic_lex_req(output)
-}
-
-/// This function constructs a normalized [`LexRequirement`] by filtering out 
entries
-/// that are ordered if the next entry is.
-/// Used in `collapse_lex_req`
-fn collapse_monotonic_lex_req(input: LexRequirement) -> LexRequirement {
-    input
-        .iter()
-        .enumerate()
-        .filter_map(|(i, item)| {
-            // If it's the last entry, there is no next entry
-            if i == input.len() - 1 {
-                return Some(item);
-            }
-            let next_expr = &input[i + 1];
-
-            // Only handle expressions with exactly one child
-            // TODO: it should be possible to handle expressions orderings 
f(a, b, c), a, b, c
-            // if f is monotonic in all arguments
-            if !(item.expr.children().len() == 1
-                && item.expr.children()[0].eq(&next_expr.expr))
-            {
-                return Some(item);
-            }
-
-            let opts = match next_expr.options {
-                None => return Some(item),
-                Some(opts) => opts,
-            };
-
-            if item.options.map(SortProperties::Ordered)
-                == 
Some(item.expr.get_ordering(&[SortProperties::Ordered(opts)]))
-            {
-                // Remove the redundant sort
-                return None;
-            }
-
-            Some(item)
-        })
-        .cloned()
-        .collect::<Vec<_>>()
+    output
 }
 
 /// Adds the `offset` value to `Column` indices inside `expr`. This function is
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs 
b/datafusion/physical-expr/src/equivalence/properties.rs
index 555f0ad317..c654208208 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -198,6 +198,61 @@ impl EquivalenceProperties {
         left: &Arc<dyn PhysicalExpr>,
         right: &Arc<dyn PhysicalExpr>,
     ) {
+        // Discover new constants in light of new the equality:
+        if self.is_expr_constant(left) {
+            // Left expression is constant, add right as constant
+            if !physical_exprs_contains(&self.constants, right) {
+                self.constants.push(right.clone());
+            }
+        } else if self.is_expr_constant(right) {
+            // Right expression is constant, add left as constant
+            if !physical_exprs_contains(&self.constants, left) {
+                self.constants.push(left.clone());
+            }
+        }
+
+        // Discover new valid orderings in light of the new equality. For a 
discussion, see:
+        // https://github.com/apache/datafusion/issues/9812
+        let mut new_orderings = vec![];
+        for ordering in self.normalized_oeq_class().iter() {
+            let expressions = if left.eq(&ordering[0].expr) {
+                // left expression is leading ordering
+                Some((ordering[0].options, right))
+            } else if right.eq(&ordering[0].expr) {
+                // right expression is leading ordering
+                Some((ordering[0].options, left))
+            } else {
+                None
+            };
+            if let Some((leading_ordering, other_expr)) = expressions {
+                // Only handle expressions with exactly one child
+                // TODO: it should be possible to handle expressions orderings 
f(a, b, c), a, b, c
+                // if f is monotonic in all arguments
+                // First Expression after leading ordering
+                if let Some(next_expr) = ordering.get(1) {
+                    let children = other_expr.children();
+                    if children.len() == 1
+                        && children[0].eq(&next_expr.expr)
+                        && SortProperties::Ordered(leading_ordering)
+                            == 
other_expr.get_ordering(&[SortProperties::Ordered(
+                                next_expr.options,
+                            )])
+                    {
+                        // Assume existing ordering is [a ASC, b ASC]
+                        // When equality a = f(b) is given, If we know that 
given ordering `[b ASC]`, ordering `[f(b) ASC]` is valid,
+                        // then we can deduce that ordering `[b ASC]` is also 
valid.
+                        // Hence, ordering `[b ASC]` can be added to the state 
as valid ordering.
+                        // (e.g. existing ordering where leading ordering is 
removed)
+                        new_orderings.push(ordering[1..].to_vec());
+                    }
+                }
+            }
+        }
+        if !new_orderings.is_empty() {
+            self.oeq_class.add_new_orderings(new_orderings);
+        }
+
+        // Add equal expressions to the state
         self.eq_group.add_equal_conditions(left, right);
     }
 
@@ -2250,11 +2305,21 @@ mod tests {
             TestCase {
                 name: "(a, b, c) -> (c)",
                 // b is constant, so it should be removed from the sort order
-                constants: vec![col_b],
+                constants: vec![col_b.clone()],
                 equal_conditions: vec![[cast_c.clone(), col_a.clone()]],
                 sort_columns: &["c"],
                 should_satisfy_ordering: true,
             },
+            // Same test with above test, where equality order is swapped.
+            // Algorithm shouldn't depend on this order.
+            TestCase {
+                name: "(a, b, c) -> (c)",
+                // b is constant, so it should be removed from the sort order
+                constants: vec![col_b],
+                equal_conditions: vec![[col_a.clone(), cast_c.clone()]],
+                sort_columns: &["c"],
+                should_satisfy_ordering: true,
+            },
             TestCase {
                 name: "not ordered because (b) is not constant",
                 // b is not constant anymore


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to