This is an automated email from the ASF dual-hosted git repository.
ozankabak pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 1d4f329984 During expression equality, check for new ordering
information (#10434)
1d4f329984 is described below
commit 1d4f329984d0ef8486d7cf36ff3c5f84348fae84
Author: Mustafa Akur <[email protected]>
AuthorDate: Fri May 10 13:09:17 2024 +0300
During expression equality, check for new ordering information (#10434)
* Move ordering discovery to equality check
* Minor changes
* Minor changes
* Apply suggestions from code review
Co-authored-by: Mehmet Ozan Kabak <[email protected]>
---------
Co-authored-by: Mehmet Ozan Kabak <[email protected]>
---
datafusion/physical-expr/src/equivalence/mod.rs | 44 +-------------
.../physical-expr/src/equivalence/properties.rs | 67 +++++++++++++++++++++-
2 files changed, 67 insertions(+), 44 deletions(-)
diff --git a/datafusion/physical-expr/src/equivalence/mod.rs
b/datafusion/physical-expr/src/equivalence/mod.rs
index 7fc27172e4..3ce641c5aa 100644
--- a/datafusion/physical-expr/src/equivalence/mod.rs
+++ b/datafusion/physical-expr/src/equivalence/mod.rs
@@ -18,7 +18,6 @@
use std::sync::Arc;
use crate::expressions::Column;
-use crate::sort_properties::SortProperties;
use crate::{LexRequirement, PhysicalExpr, PhysicalSortRequirement};
use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode};
@@ -47,48 +46,7 @@ pub fn collapse_lex_req(input: LexRequirement) ->
LexRequirement {
output.push(item);
}
}
- collapse_monotonic_lex_req(output)
-}
-
-/// This function constructs a normalized [`LexRequirement`] by filtering out
entries
-/// that are ordered if the next entry is.
-/// Used in `collapse_lex_req`
-fn collapse_monotonic_lex_req(input: LexRequirement) -> LexRequirement {
- input
- .iter()
- .enumerate()
- .filter_map(|(i, item)| {
- // If it's the last entry, there is no next entry
- if i == input.len() - 1 {
- return Some(item);
- }
- let next_expr = &input[i + 1];
-
- // Only handle expressions with exactly one child
- // TODO: it should be possible to handle expressions orderings
f(a, b, c), a, b, c
- // if f is monotonic in all arguments
- if !(item.expr.children().len() == 1
- && item.expr.children()[0].eq(&next_expr.expr))
- {
- return Some(item);
- }
-
- let opts = match next_expr.options {
- None => return Some(item),
- Some(opts) => opts,
- };
-
- if item.options.map(SortProperties::Ordered)
- ==
Some(item.expr.get_ordering(&[SortProperties::Ordered(opts)]))
- {
- // Remove the redundant sort
- return None;
- }
-
- Some(item)
- })
- .cloned()
- .collect::<Vec<_>>()
+ output
}
/// Adds the `offset` value to `Column` indices inside `expr`. This function is
diff --git a/datafusion/physical-expr/src/equivalence/properties.rs
b/datafusion/physical-expr/src/equivalence/properties.rs
index 555f0ad317..c654208208 100644
--- a/datafusion/physical-expr/src/equivalence/properties.rs
+++ b/datafusion/physical-expr/src/equivalence/properties.rs
@@ -198,6 +198,61 @@ impl EquivalenceProperties {
left: &Arc<dyn PhysicalExpr>,
right: &Arc<dyn PhysicalExpr>,
) {
+ // Discover new constants in light of new the equality:
+ if self.is_expr_constant(left) {
+ // Left expression is constant, add right as constant
+ if !physical_exprs_contains(&self.constants, right) {
+ self.constants.push(right.clone());
+ }
+ } else if self.is_expr_constant(right) {
+ // Right expression is constant, add left as constant
+ if !physical_exprs_contains(&self.constants, left) {
+ self.constants.push(left.clone());
+ }
+ }
+
+ // Discover new valid orderings in light of the new equality. For a
discussion, see:
+ // https://github.com/apache/datafusion/issues/9812
+ let mut new_orderings = vec![];
+ for ordering in self.normalized_oeq_class().iter() {
+ let expressions = if left.eq(&ordering[0].expr) {
+ // left expression is leading ordering
+ Some((ordering[0].options, right))
+ } else if right.eq(&ordering[0].expr) {
+ // right expression is leading ordering
+ Some((ordering[0].options, left))
+ } else {
+ None
+ };
+ if let Some((leading_ordering, other_expr)) = expressions {
+ // Only handle expressions with exactly one child
+ // TODO: it should be possible to handle expressions orderings
f(a, b, c), a, b, c
+ // if f is monotonic in all arguments
+ // First Expression after leading ordering
+ if let Some(next_expr) = ordering.get(1) {
+ let children = other_expr.children();
+ if children.len() == 1
+ && children[0].eq(&next_expr.expr)
+ && SortProperties::Ordered(leading_ordering)
+ ==
other_expr.get_ordering(&[SortProperties::Ordered(
+ next_expr.options,
+ )])
+ {
+ // Assume existing ordering is [a ASC, b ASC]
+ // When equality a = f(b) is given, If we know that
given ordering `[b ASC]`, ordering `[f(b) ASC]` is valid,
+ // then we can deduce that ordering `[b ASC]` is also
valid.
+ // Hence, ordering `[b ASC]` can be added to the state
as valid ordering.
+ // (e.g. existing ordering where leading ordering is
removed)
+ new_orderings.push(ordering[1..].to_vec());
+ }
+ }
+ }
+ }
+ if !new_orderings.is_empty() {
+ self.oeq_class.add_new_orderings(new_orderings);
+ }
+
+ // Add equal expressions to the state
self.eq_group.add_equal_conditions(left, right);
}
@@ -2250,11 +2305,21 @@ mod tests {
TestCase {
name: "(a, b, c) -> (c)",
// b is constant, so it should be removed from the sort order
- constants: vec![col_b],
+ constants: vec![col_b.clone()],
equal_conditions: vec![[cast_c.clone(), col_a.clone()]],
sort_columns: &["c"],
should_satisfy_ordering: true,
},
+ // Same test with above test, where equality order is swapped.
+ // Algorithm shouldn't depend on this order.
+ TestCase {
+ name: "(a, b, c) -> (c)",
+ // b is constant, so it should be removed from the sort order
+ constants: vec![col_b],
+ equal_conditions: vec![[col_a.clone(), cast_c.clone()]],
+ sort_columns: &["c"],
+ should_satisfy_ordering: true,
+ },
TestCase {
name: "not ordered because (b) is not constant",
// b is not constant anymore
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]