rluvaton commented on code in PR #18152:
URL: https://github.com/apache/datafusion/pull/18152#discussion_r2466230898
##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -283,70 +717,81 @@ impl CaseExpr {
/// END
fn case_when_no_expr(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
let return_type = self.data_type(&batch.schema())?;
+ let mut result_builder = ResultBuilder::new(&return_type,
batch.num_rows());
- // start with nulls as default output
- let mut current_value = new_null_array(&return_type, batch.num_rows());
- let mut remainder = BooleanArray::from(vec![true; batch.num_rows()]);
- let mut remainder_count = batch.num_rows();
- for i in 0..self.when_then_expr.len() {
- // If there are no rows left to process, break out of the loop
early
- if remainder_count == 0 {
- break;
- }
+ // `remainder_rows` contains the indices of the rows that need to be
evaluated
+ let mut remainder_rows: ArrayRef =
+ Arc::new(UInt32Array::from_iter(0..batch.num_rows() as u32));
+ // `remainder_batch` contains the rows themselves that need to be
evaluated
+ let mut remainder_batch = Cow::Borrowed(batch);
+ for i in 0..self.when_then_expr.len() {
+ // Evaluate the 'when' predicate for the remainder batch
+ // This results in a boolean array with the same length as the
remaining number of rows
let when_predicate = &self.when_then_expr[i].0;
- let when_value = when_predicate.evaluate_selection(batch,
&remainder)?;
- let when_value = when_value.into_array(batch.num_rows())?;
+ let when_value = when_predicate
+ .evaluate(&remainder_batch)?
+ .into_array(remainder_batch.num_rows())?;
let when_value = as_boolean_array(&when_value).map_err(|_| {
internal_datafusion_err!("WHEN expression did not return a
BooleanArray")
})?;
- // Treat 'NULL' as false value
- let when_value = match when_value.null_count() {
- 0 => Cow::Borrowed(when_value),
- _ => Cow::Owned(prep_null_mask_filter(when_value)),
- };
- // Make sure we only consider rows that have not been matched yet
- let when_value = and(&when_value, &remainder)?;
- // If the predicate did not match any rows, continue to the next
branch immediately
let when_match_count = when_value.true_count();
+
+ // If the 'when' predicate did not match any rows, continue to the
next branch immediately
if when_match_count == 0 {
continue;
}
- let then_expression = &self.when_then_expr[i].1;
- let then_value = then_expression.evaluate_selection(batch,
&when_value)?;
+ // If the 'when' predicate matched all remaining rows, there is no
need to filter
+ if when_match_count == remainder_batch.num_rows() {
+ let then_expression = &self.when_then_expr[i].1;
+ let then_value = then_expression.evaluate(&remainder_batch)?;
+ result_builder.add_branch_result(&remainder_rows, then_value)?;
+ return result_builder.finish();
+ }
- current_value = match then_value {
- ColumnarValue::Scalar(ScalarValue::Null) => {
- nullif(current_value.as_ref(), &when_value)?
- }
- ColumnarValue::Scalar(then_value) => {
- zip(&when_value, &then_value.to_scalar()?, ¤t_value)?
- }
- ColumnarValue::Array(then_value) => {
- zip(&when_value, &then_value, ¤t_value)?
- }
+ // Make sure 'NULL' is treated as false
+ let when_value = match when_value.null_count() {
+ 0 => Cow::Borrowed(when_value),
+ _ => Cow::Owned(prep_null_mask_filter(when_value)),
};
- // Succeed tuples should be filtered out for short-circuit
evaluation,
- // null values for the current when expr should be kept
- remainder = and_not(&remainder, &when_value)?;
- remainder_count -= when_match_count;
+ // Filter the remainder batch based on the 'when' value
+ // This results in a batch containing only the rows that need to
be evaluated
+ // for the current branch
+ let then_filter = create_filter(&when_value);
+ let then_batch = filter_record_batch(&remainder_batch,
&then_filter)?;
+ let then_rows = filter_array(&remainder_rows, &then_filter)?;
+
+ let then_expression = &self.when_then_expr[i].1;
+ let then_value = then_expression.evaluate(&then_batch)?;
+ result_builder.add_branch_result(&then_rows, then_value)?;
+
+ // If this is the last 'when' branch and there is no 'else'
expression, there's no
+ // point in calculating the remaining rows.
+ if i == self.when_then_expr.len() - 1 && self.else_expr.is_none() {
+ return result_builder.finish();
+ }
Review Comment:
```suggestion
// If this is the last 'when' branch and there is no 'else'
expression, there's no
// point in calculating the remaining rows.
if i == self.when_then_expr.len() - 1 &&
self.else_expr.is_none() {
break;
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]