CuteChuanChuan commented on code in PR #19994:
URL: https://github.com/apache/datafusion/pull/19994#discussion_r2746696533


##########
datafusion/physical-expr/benches/case_when.rs:
##########
@@ -517,5 +519,106 @@ fn benchmark_lookup_table_case_when(c: &mut Criterion, 
batch_size: usize) {
     }
 }
 
+fn benchmark_divide_by_zero_protection(c: &mut Criterion, batch_size: usize) {
+    let mut group = c.benchmark_group("divide_by_zero_protection");
+
+    for zero_percentage in [0.0, 0.1, 0.5, 0.9] {
+        let rng = &mut seedable_rng();
+
+        let numerator: Int32Array =
+            (0..batch_size).map(|_| Some(rng.random::<i32>())).collect();
+
+        let divisor_values: Vec<Option<i32>> = (0..batch_size)
+            .map(|_| {
+                let roll: f32 = rng.random();
+                if roll < zero_percentage {
+                    Some(0)
+                } else {
+                    let mut val = rng.random::<i32>();
+                    while val == 0 {
+                        val = rng.random::<i32>();
+                    }
+                    Some(val)
+                }
+            })
+            .collect();
+
+        let divisor: Int32Array = divisor_values.iter().cloned().collect();
+        let divisor_copy: Int32Array = 
divisor_values.iter().cloned().collect();
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("numerator", numerator.data_type().clone(), true),
+            Field::new("divisor", divisor.data_type().clone(), true),
+            Field::new("divisor_copy", divisor_copy.data_type().clone(), true),
+        ]));
+
+        let batch = RecordBatch::try_new(
+            Arc::clone(&schema),
+            vec![
+                Arc::new(numerator),
+                Arc::new(divisor),
+                Arc::new(divisor_copy),
+            ],
+        )
+        .unwrap();
+
+        let numerator_col = col("numerator", &batch.schema()).unwrap();
+        let divisor_col = col("divisor", &batch.schema()).unwrap();
+        let divisor_copy_col = col("divisor_copy", &batch.schema()).unwrap();
+
+        group.bench_function(
+            format!(
+                "{} rows, {}% zeros: DivideByZeroProtection",
+                batch_size,
+                (zero_percentage * 100.0) as i32
+            ),
+            |b| {
+                let when = Arc::new(BinaryExpr::new(
+                    Arc::clone(&divisor_col),
+                    Operator::Gt,
+                    lit(0i32),
+                ));
+                let then = Arc::new(BinaryExpr::new(
+                    Arc::clone(&numerator_col),
+                    Operator::Divide,
+                    Arc::clone(&divisor_col),
+                ));
+                let else_null: Arc<dyn PhysicalExpr> = 
lit(ScalarValue::Int32(None));
+                let expr =
+                    Arc::new(case(None, vec![(when, then)], 
Some(else_null)).unwrap());
+
+                b.iter(|| black_box(expr.evaluate(black_box(&batch)).unwrap()))
+            },
+        );
+
+        group.bench_function(

Review Comment:
   Hi @pepijnve ,
   The key difference is in which column is used in the **WHEN** condition:
   
   - First benchmark (`DivideByZeroProtection`): checks **divisor_col** > 0, 
and since divisor_col is also the divisor in numerator / divisor_col, this 
matches the pattern and triggers the optimization.
   - Second benchmark (`ExpressionOrExpression`): checks **divisor_copy_col** > 
0, but the division uses divisor_col. Since the checked column doesn't match 
the divisor, the optimization is not triggered and it falls back to 
ExpressionOrExpression.
   
   I'll also add a comment in the code to make this distinction clearer.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to