pepijnve commented on code in PR #18329:
URL: https://github.com/apache/datafusion/pull/18329#discussion_r2475600474


##########
datafusion/physical-expr/src/expressions/case.rs:
##########
@@ -845,6 +935,79 @@ impl CaseExpr {
         result_builder.finish()
     }
 
+    /// See [CaseExpr::expr_or_expr].
+    fn expr_or_expr(
+        &self,
+        batch: &RecordBatch,
+        when_value: &BooleanArray,
+        return_type: &DataType,
+    ) -> Result<ColumnarValue> {
+        let then_value = self.when_then_expr[0]
+            .1
+            .evaluate_selection(batch, when_value)?
+            .into_array(batch.num_rows())?;
+
+        // evaluate else expression on the values not covered by when_value
+        let remainder = not(when_value)?;
+        let e = self.else_expr.as_ref().unwrap();
+        // keep `else_expr`'s data type and return type consistent
+        let expr = try_cast(Arc::clone(e), &batch.schema(), 
return_type.clone())
+            .unwrap_or_else(|_| Arc::clone(e));
+        let else_ = expr
+            .evaluate_selection(batch, &remainder)?
+            .into_array(batch.num_rows())?;
+
+        Ok(ColumnarValue::Array(zip(&remainder, &else_, &then_value)?))
+    }
+}
+
+impl CaseExpr {
+    /// This function evaluates the form of CASE that matches an expression to 
fixed values.
+    ///
+    /// CASE expression
+    ///     WHEN value THEN result
+    ///     [WHEN ...]
+    ///     [ELSE result]
+    /// END
+    fn case_when_with_expr(
+        &self,
+        batch: &RecordBatch,
+        projected: &ProjectedCaseBody,
+    ) -> Result<ColumnarValue> {
+        let return_type = self.data_type(&batch.schema())?;
+        if projected.projection.len() < batch.num_columns() {

Review Comment:
   > when would projection.len() be greater?
   
   It will never be greater (or shouldn't be at least), but may be equal when 
all the input columns are used indeed.
   
   The reason this is necessary in the first place is because at construction 
time of the CaseExpr you're flying blind wrt the schema. If the set of used 
columns is for instance `0, 1, 2` there's no way to know if that's all of them 
or a prefix of the full schema. Unfortunately that necessitates this per 
evaluate check.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to