alamb commented on code in PR #3859:
URL: https://github.com/apache/arrow-datafusion/pull/3859#discussion_r999839048


##########
datafusion/optimizer/src/utils.rs:
##########
@@ -663,4 +815,84 @@ mod tests {
             "mismatch rewriting expr_from: {expr_from} to {rewrite_to}"
         )
     }
+
+    #[test]
+    fn test_rewrite_cnf() {
+        let a_1 = col("a").eq(lit(1i64));
+        let a_2 = col("a").eq(lit(2i64));
+
+        let b_1 = col("b").eq(lit(1i64));
+        let b_2 = col("b").eq(lit(2i64));
+
+        // Test rewrite on a1_and_b2 and a2_and_b1 -> not change
+        let mut helper = CnfHelper::new();
+        let expr1 = and(a_1.clone(), b_2.clone());
+        let expect = expr1.clone();
+        let res = expr1.rewrite(&mut helper).unwrap();
+        assert_eq!(expect, res);
+
+        // Test rewrite on a1_and_b2 and a2_and_b1 -> (((a1 and b2) and a2) 
and b1)
+        let mut helper = CnfHelper::new();
+        let expr1 = and(and(a_1.clone(), b_2.clone()), and(a_2.clone(), 
b_1.clone()));
+        let expect = and(a_1.clone(), b_2.clone())
+            .and(a_2.clone())
+            .and(b_1.clone());
+        let res = expr1.rewrite(&mut helper).unwrap();
+        assert_eq!(expect, res);
+
+        // Test rewrite on a1_or_b2  -> not change
+        let mut helper = CnfHelper::new();
+        let expr1 = or(a_1.clone(), b_2.clone());
+        let expect = expr1.clone();
+        let res = expr1.rewrite(&mut helper).unwrap();
+        assert_eq!(expect, res);
+
+        // Test rewrite on a1_and_b2 or a2_and_b1 ->  a1_or_a2 and a1_or_b1 
and b2_or_a2 and b2_or_b1
+        let mut helper = CnfHelper::new();
+        let expr1 = or(and(a_1.clone(), b_2.clone()), and(a_2.clone(), 
b_1.clone()));
+        let a1_or_a2 = or(a_1.clone(), a_2.clone());
+        let a1_or_b1 = or(a_1.clone(), b_1.clone());
+        let b2_or_a2 = or(b_2.clone(), a_2.clone());
+        let b2_or_b1 = or(b_2.clone(), b_1.clone());
+        let expect = and(a1_or_a2, a1_or_b1).and(b2_or_a2).and(b2_or_b1);
+        let res = expr1.rewrite(&mut helper).unwrap();
+        assert_eq!(expect, res);
+
+        // Test rewrite on a1_or_b2 or a2_and_b1 ->  ( a1_or_a2 or a2 ) and 
(a1_or_a2 or b1)
+        let mut helper = CnfHelper::new();
+        let a1_or_b2 = or(a_1.clone(), b_2.clone());
+        let expr1 = or(or(a_1.clone(), b_2.clone()), and(a_2.clone(), 
b_1.clone()));
+        let expect = or(a1_or_b2.clone(), a_2.clone()).and(or(a1_or_b2, 
b_1.clone()));
+        let res = expr1.rewrite(&mut helper).unwrap();
+        assert_eq!(expect, res);
+
+        // Test rewrite on a1_or_b2 or a2_or_b1 ->  not change
+        let mut helper = CnfHelper::new();
+        let expr1 = or(or(a_1, b_2), or(a_2, b_1));
+        let expect = expr1.clone();
+        let res = expr1.rewrite(&mut helper).unwrap();
+        assert_eq!(expect, res);
+    }
+
+    #[test]
+    fn test_rewrite_cnf_overflow() {

Review Comment:
   ❤️ 



##########
datafusion/optimizer/src/utils.rs:
##########
@@ -663,4 +796,84 @@ mod tests {
             "mismatch rewriting expr_from: {expr_from} to {rewrite_to}"
         )
     }
+
+    #[test]
+    fn test_rewrite_cnf() {
+        let a_1 = col("a").eq(lit(1i64));
+        let a_2 = col("a").eq(lit(2i64));
+
+        let b_1 = col("b").eq(lit(1i64));
+        let b_2 = col("b").eq(lit(2i64));
+
+        // Test rewrite on a1_and_b2 and a2_and_b1 -> not change
+        let mut helper = CnfHelper::new();
+        let expr1 = and(a_1.clone(), b_2.clone());
+        let expect = expr1.clone();
+        let res = expr1.rewrite(&mut helper).unwrap();
+        assert_eq!(expect, res);
+
+        // Test rewrite on a1_and_b2 and a2_and_b1 -> (((a1 and b2) and a2) 
and b1)
+        let mut helper = CnfHelper::new();
+        let expr1 = and(and(a_1.clone(), b_2.clone()), and(a_2.clone(), 
b_1.clone()));
+        let expect = and(a_1.clone(), b_2.clone())

Review Comment:
   > This is not equal but they have the same string format:
   
   I agree this is not ideal



##########
datafusion/optimizer/src/filter_push_down.rs:
##########
@@ -952,6 +953,30 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn filter_keep_partial_agg() -> Result<()> {
+        let table_scan = test_table_scan()?;
+        let f1 = col("c").eq(lit(1i64)).and(col("b").gt(lit(2i64)));
+        let f2 = col("c").eq(lit(1i64)).and(col("b").gt(lit(3i64)));
+        let filter = f1.or(f2);
+        let plan = LogicalPlanBuilder::from(table_scan)
+            .aggregate(vec![col("a")], vec![sum(col("b")).alias("b")])?
+            .filter(filter)?
+            .build()?;
+        // filter of aggregate is after aggregation since they are 
non-commutative
+        // (c =1 AND b > 2) OR (c = 1 AND b > 3)
+        // rewrite to CNF
+        // (c = 1 OR c = 1) [can pushDown] AND (c = 1 OR b > 3) AND (b > 2 OR 
C = 1) AND (b > 2 OR b > 3)
+
+        let expected = "\
+            Filter: test.c = Int64(1) OR b > Int64(3) AND b > Int64(2) OR 
test.c = Int64(1) AND b > Int64(2) OR b > Int64(3)\
+            \n  Aggregate: groupBy=[[test.a]], aggr=[[SUM(test.b) AS b]]\
+            \n    Filter: test.c = Int64(1) OR test.c = Int64(1)\

Review Comment:
   Filed https://github.com/apache/arrow-datafusion/issues/3895



##########
datafusion/optimizer/src/utils.rs:
##########
@@ -96,30 +96,173 @@ fn split_conjunction_impl<'a>(expr: &'a Expr, mut exprs: 
Vec<&'a Expr>) -> Vec<&
 /// ];
 ///
 /// // use split_conjunction_owned to split them
-/// assert_eq!(split_conjunction_owned(expr), split);
+/// assert_eq!(split_conjunction_owned(expr, Operator::And), split);
 /// ```
-pub fn split_conjunction_owned(expr: Expr) -> Vec<Expr> {
-    split_conjunction_owned_impl(expr, vec![])
+pub fn split_conjunction_owned(expr: Expr, op: Operator) -> Vec<Expr> {

Review Comment:
   I agree this is useful -- though I would prefer this function to remain 
named `split_conjunction` (as the `conjunction` implies `AND` and instead have 
a new function called `split_operator` or something. Maybe I can make a quick PR



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to