alamb commented on code in PR #17404:
URL: https://github.com/apache/datafusion/pull/17404#discussion_r2322384067


##########
datafusion/physical-expr/src/equivalence/properties/mod.rs:
##########
@@ -255,10 +255,11 @@ impl EquivalenceProperties {
     pub fn constants(&self) -> Vec<ConstExpr> {
         self.eq_group
             .iter()
-            .filter_map(|c| {
-                c.constant.as_ref().and_then(|across| {
-                    c.canonical_expr()
-                        .map(|expr| ConstExpr::new(Arc::clone(expr), 
across.clone()))
+            .flat_map(|c| {

Review Comment:
   The bug fix here is that this method now adds an entry for each equivalent 
expression rather than just the first "canonical" one
   
   For example, if the equivalent expressions are `(a, b)` and the constant is 
known to be 1, now the code will return
   `a=1, b=1` but before the fix it would only return `a=1`)



##########
datafusion/core/tests/physical_optimizer/sanity_checker.rs:
##########
@@ -665,3 +666,77 @@ async fn test_sort_merge_join_dist_missing() -> Result<()> 
{
     assert_sanity_check(&smj, false);
     Ok(())
 }
+
+/// A particular edge case.
+///
+/// See <https://github.com/apache/datafusion/issues/17372>.
+#[tokio::test]
+async fn test_union_with_sorts_and_constants() -> Result<()> {
+    let schema_in = create_test_schema2();
+
+    let proj_exprs_1 = vec![
+        (
+            Arc::new(Literal::new(ScalarValue::Utf8(Some("foo".to_owned())))) 
as _,
+            "const_1".to_owned(),
+        ),
+        (
+            Arc::new(Literal::new(ScalarValue::Utf8(Some("foo".to_owned())))) 
as _,
+            "const_2".to_owned(),
+        ),
+        (col("a", &schema_in).unwrap(), "a".to_owned()),
+    ];
+    let proj_exprs_2 = vec![
+        (
+            Arc::new(Literal::new(ScalarValue::Utf8(Some("foo".to_owned())))) 
as _,
+            "const_1".to_owned(),
+        ),
+        (
+            Arc::new(Literal::new(ScalarValue::Utf8(Some("bar".to_owned())))) 
as _,
+            "const_2".to_owned(),
+        ),
+        (col("a", &schema_in).unwrap(), "a".to_owned()),
+    ];
+
+    let source_1 = memory_exec(&schema_in);
+    let source_1 = projection_exec(proj_exprs_1.clone(), source_1).unwrap();
+    let schema_sources = source_1.schema();
+    let ordering_sources: LexOrdering =
+        [sort_expr("a", &schema_sources).nulls_last()].into();
+    let source_1 = sort_exec(ordering_sources.clone(), source_1);
+
+    let source_2 = memory_exec(&schema_in);
+    let source_2 = projection_exec(proj_exprs_2, source_2).unwrap();
+    let source_2 = sort_exec(ordering_sources.clone(), source_2);
+
+    let plan = union_exec(vec![source_1, source_2]);
+
+    let schema_out = plan.schema();
+    let ordering_out: LexOrdering = [
+        sort_expr("const_1", &schema_out).nulls_last(),
+        sort_expr("const_2", &schema_out).nulls_last(),
+        sort_expr("a", &schema_out).nulls_last(),
+    ]
+    .into();
+
+    let plan = sort_preserving_merge_exec(ordering_out, plan);
+
+    let plan_str = displayable(plan.as_ref()).indent(true).to_string();
+    let plan_str = plan_str.trim();
+    assert_snapshot!(
+        plan_str,
+        @r"
+    SortPreservingMergeExec: [const_1@0 ASC NULLS LAST, const_2@1 ASC NULLS 
LAST, a@2 ASC NULLS LAST]
+      UnionExec
+        SortExec: expr=[a@2 ASC NULLS LAST], preserve_partitioning=[false]
+          ProjectionExec: expr=[foo as const_1, foo as const_2, a@0 as a]
+            DataSourceExec: partitions=1, partition_sizes=[0]
+        SortExec: expr=[a@2 ASC NULLS LAST], preserve_partitioning=[false]
+          ProjectionExec: expr=[foo as const_1, bar as const_2, a@0 as a]
+            DataSourceExec: partitions=1, partition_sizes=[0]
+    "
+    );
+
+    assert_sanity_check(&plan, true);

Review Comment:
   
   I verified that this test covers the code in the PR by running it without 
the code changes and it fails like this
   
   ```
   assertion `left == right` failed
     left: false
    right: true
   
   Left:  false
   Right: true
   <Click to see difference>
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to