alamb commented on code in PR #2764:
URL: https://github.com/apache/arrow-datafusion/pull/2764#discussion_r905386562


##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -160,6 +165,7 @@ macro_rules! make_contains_primitive {
                 ColumnarValue::Scalar(s) => match s {
                     ScalarValue::$SCALAR_VALUE(Some(v)) => Some(*v),
                     ScalarValue::$SCALAR_VALUE(None) => None,
+                    // TODO this is bug, for primitive the expr list should be 
cast to the same data type

Review Comment:
   Is it worth filing a follow on ticket to address? I am not 100% sure I 
understand what the end user facing bug would be (is it null is the wrong type?)



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -504,6 +634,11 @@ impl PhysicalExpr for InListExpr {
                         .unwrap();
                     set_contains_with_negated!(array, set, self.negated)
                 }
+                DataType::Decimal(_, _) => {
+                    let array = 
array.as_any().downcast_ref::<DecimalArray>().unwrap();

Review Comment:
   Stylistic suggestion: you can also use 
https://docs.rs/arrow/16.0.0/arrow/array/fn.as_decimal_array.html here too
   
   ```suggestion
                       let array = as_decimal_array(array);
   ```



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -300,6 +306,130 @@ fn cast_static_filter_to_set(list: &[Arc<dyn 
PhysicalExpr>]) -> HashSet<ScalarVa
     }))
 }
 
+fn make_list_contains_decimal(
+    array: &DecimalArray,
+    list: Vec<ColumnarValue>,
+    negated: bool,
+) -> BooleanArray {
+    let contains_null = list
+        .iter()
+        .any(|v| matches!(v, ColumnarValue::Scalar(s) if s.is_null()));
+    let values = list
+        .iter()
+        .flat_map(|v| match v {
+            ColumnarValue::Scalar(s) => match s {
+                Decimal128(v128op, _, _) => v128op.map(|v128| v128),

Review Comment:
   I don't understand what this `map` does. Is this the same?
   
   ```suggestion
                   Decimal128(v128op, _, _) => *v128op,
   ```



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -640,13 +785,62 @@ impl PhysicalExpr for InListExpr {
     }
 }
 
+type InListCastResult = (Arc<dyn PhysicalExpr>, Vec<Arc<dyn PhysicalExpr>>);
+
 /// Creates a unary expression InList
 pub fn in_list(
     expr: Arc<dyn PhysicalExpr>,
     list: Vec<Arc<dyn PhysicalExpr>>,
     negated: &bool,
+    input_schema: &Schema,
 ) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(InListExpr::new(expr, list, *negated)))
+    let (cast_expr, cast_list) = in_list_cast(expr, list, input_schema)?;
+    Ok(Arc::new(InListExpr::new(cast_expr, cast_list, *negated)))
+}
+
+fn in_list_cast(
+    expr: Arc<dyn PhysicalExpr>,
+    list: Vec<Arc<dyn PhysicalExpr>>,
+    input_schema: &Schema,
+) -> Result<InListCastResult> {
+    let expr_type = &expr.data_type(input_schema)?;
+    let list_types: Vec<DataType> = list
+        .iter()
+        .map(|list_expr| list_expr.data_type(input_schema).unwrap())
+        .collect();
+    // TODO in the arrow-rs, should support NULL type to Decimal Data type
+    // TODO support in the arrow-rs, NULL value cast to Decimal Value
+    let result_type = get_coerce_type(expr_type, &list_types);
+    match result_type {
+        None => Err(DataFusionError::Internal(format!(
+            "In expr can find the coerced type for {:?} in {:?}",
+            expr_type, list_types
+        ))),
+        Some(data_type) => {
+            // find the coerced type
+            let cast_expr = try_cast(expr, input_schema, data_type.clone())?;
+            let cast_list_expr = list
+                .into_iter()
+                .map(|list_expr| {
+                    try_cast(list_expr, input_schema, 
data_type.clone()).unwrap()
+                })
+                .collect();
+            Ok((cast_expr, cast_list_expr))
+        }
+    }
+}
+
+fn get_coerce_type(expr_type: &DataType, list_type: &[DataType]) -> 
Option<DataType> {
+    // get the equal coerced data type
+    list_type
+        .iter()
+        .fold(Some(expr_type.clone()), |left, right_type| {
+            match left {
+                None => None,
+                // TODO refactor a framework to do the data type coercion

Review Comment:
   👍  I agree it could be much nicer



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -300,6 +306,130 @@ fn cast_static_filter_to_set(list: &[Arc<dyn 
PhysicalExpr>]) -> HashSet<ScalarVa
     }))
 }
 
+fn make_list_contains_decimal(
+    array: &DecimalArray,
+    list: Vec<ColumnarValue>,
+    negated: bool,
+) -> BooleanArray {
+    let contains_null = list
+        .iter()
+        .any(|v| matches!(v, ColumnarValue::Scalar(s) if s.is_null()));
+    let values = list
+        .iter()
+        .flat_map(|v| match v {
+            ColumnarValue::Scalar(s) => match s {
+                Decimal128(v128op, _, _) => v128op.map(|v128| v128),
+                _ => {
+                    unreachable!(
+                        "InList can't reach other data type for decimal data 
type."
+                    )
+                }
+            },
+            ColumnarValue::Array(_) => {
+                unimplemented!("InList does not yet support nested columns.")
+            }
+        })
+        .collect::<Vec<_>>();
+
+    if !negated {
+        // In
+        array
+            .iter()
+            .map(|v| match v {
+                None => None,
+                Some(v128) => {
+                    if values.contains(&v128) {
+                        Some(true)
+                    } else {
+                        Some(false)
+                    }
+                }
+            })
+            .collect::<BooleanArray>()
+    } else {
+        // Not in
+        if contains_null {
+            // if the expr is NOT IN and the list contains NULL value
+            // All the result must be NONE
+            vec![None; array.len()]

Review Comment:
   I think using `new_null_array` here might be both simpler and easier to read:
   
   https://docs.rs/arrow/16.0.0/arrow/array/fn.new_null_array.html



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -300,6 +306,130 @@ fn cast_static_filter_to_set(list: &[Arc<dyn 
PhysicalExpr>]) -> HashSet<ScalarVa
     }))
 }
 
+fn make_list_contains_decimal(
+    array: &DecimalArray,
+    list: Vec<ColumnarValue>,
+    negated: bool,
+) -> BooleanArray {
+    let contains_null = list
+        .iter()
+        .any(|v| matches!(v, ColumnarValue::Scalar(s) if s.is_null()));
+    let values = list
+        .iter()
+        .flat_map(|v| match v {
+            ColumnarValue::Scalar(s) => match s {
+                Decimal128(v128op, _, _) => v128op.map(|v128| v128),
+                _ => {
+                    unreachable!(
+                        "InList can't reach other data type for decimal data 
type."
+                    )
+                }
+            },
+            ColumnarValue::Array(_) => {
+                unimplemented!("InList does not yet support nested columns.")
+            }
+        })
+        .collect::<Vec<_>>();
+
+    if !negated {
+        // In
+        array
+            .iter()
+            .map(|v| match v {
+                None => None,
+                Some(v128) => {
+                    if values.contains(&v128) {
+                        Some(true)
+                    } else {
+                        Some(false)
+                    }
+                }
+            })
+            .collect::<BooleanArray>()

Review Comment:
   ```suggestion
           array
               .iter()
               .map(|v| v.map(|v128| values.contains(&v128)))
               .collect::<BooleanArray>()
   ```
   Is a bit less verbose and perhaps easier to follow



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -300,6 +306,130 @@ fn cast_static_filter_to_set(list: &[Arc<dyn 
PhysicalExpr>]) -> HashSet<ScalarVa
     }))
 }
 
+fn make_list_contains_decimal(
+    array: &DecimalArray,
+    list: Vec<ColumnarValue>,
+    negated: bool,
+) -> BooleanArray {
+    let contains_null = list
+        .iter()
+        .any(|v| matches!(v, ColumnarValue::Scalar(s) if s.is_null()));
+    let values = list
+        .iter()
+        .flat_map(|v| match v {
+            ColumnarValue::Scalar(s) => match s {
+                Decimal128(v128op, _, _) => v128op.map(|v128| v128),
+                _ => {
+                    unreachable!(
+                        "InList can't reach other data type for decimal data 
type."
+                    )
+                }
+            },
+            ColumnarValue::Array(_) => {
+                unimplemented!("InList does not yet support nested columns.")
+            }
+        })
+        .collect::<Vec<_>>();
+
+    if !negated {
+        // In
+        array
+            .iter()
+            .map(|v| match v {
+                None => None,
+                Some(v128) => {
+                    if values.contains(&v128) {
+                        Some(true)
+                    } else {
+                        Some(false)
+                    }
+                }
+            })
+            .collect::<BooleanArray>()
+    } else {
+        // Not in
+        if contains_null {
+            // if the expr is NOT IN and the list contains NULL value
+            // All the result must be NONE
+            vec![None; array.len()]
+                .into_iter()
+                .collect::<BooleanArray>()
+        } else {
+            array
+                .iter()
+                .map(|v| match v {
+                    None => None,
+                    Some(v128) => {
+                        if !values.contains(&v128) {
+                            Some(true)
+                        } else {
+                            Some(false)
+                        }
+                    }
+                })
+                .collect::<BooleanArray>()
+        }
+    }
+}
+
+fn make_set_contains_decimal(

Review Comment:
   The same comments apply here as do `make_list_contains_decimal` as they are 
largely copy/paste of each other. 



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -300,6 +306,130 @@ fn cast_static_filter_to_set(list: &[Arc<dyn 
PhysicalExpr>]) -> HashSet<ScalarVa
     }))
 }
 
+fn make_list_contains_decimal(
+    array: &DecimalArray,
+    list: Vec<ColumnarValue>,
+    negated: bool,
+) -> BooleanArray {
+    let contains_null = list
+        .iter()
+        .any(|v| matches!(v, ColumnarValue::Scalar(s) if s.is_null()));
+    let values = list
+        .iter()
+        .flat_map(|v| match v {
+            ColumnarValue::Scalar(s) => match s {
+                Decimal128(v128op, _, _) => v128op.map(|v128| v128),
+                _ => {
+                    unreachable!(
+                        "InList can't reach other data type for decimal data 
type."
+                    )
+                }
+            },
+            ColumnarValue::Array(_) => {
+                unimplemented!("InList does not yet support nested columns.")
+            }
+        })
+        .collect::<Vec<_>>();
+
+    if !negated {
+        // In
+        array
+            .iter()
+            .map(|v| match v {
+                None => None,
+                Some(v128) => {
+                    if values.contains(&v128) {
+                        Some(true)
+                    } else {
+                        Some(false)
+                    }
+                }
+            })
+            .collect::<BooleanArray>()

Review Comment:
   The same could be applied to the `NOT IN` branch



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -640,13 +785,62 @@ impl PhysicalExpr for InListExpr {
     }
 }
 
+type InListCastResult = (Arc<dyn PhysicalExpr>, Vec<Arc<dyn PhysicalExpr>>);
+
 /// Creates a unary expression InList
 pub fn in_list(
     expr: Arc<dyn PhysicalExpr>,
     list: Vec<Arc<dyn PhysicalExpr>>,
     negated: &bool,
+    input_schema: &Schema,
 ) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(InListExpr::new(expr, list, *negated)))
+    let (cast_expr, cast_list) = in_list_cast(expr, list, input_schema)?;
+    Ok(Arc::new(InListExpr::new(cast_expr, cast_list, *negated)))
+}
+
+fn in_list_cast(
+    expr: Arc<dyn PhysicalExpr>,
+    list: Vec<Arc<dyn PhysicalExpr>>,
+    input_schema: &Schema,
+) -> Result<InListCastResult> {
+    let expr_type = &expr.data_type(input_schema)?;
+    let list_types: Vec<DataType> = list
+        .iter()
+        .map(|list_expr| list_expr.data_type(input_schema).unwrap())
+        .collect();
+    // TODO in the arrow-rs, should support NULL type to Decimal Data type
+    // TODO support in the arrow-rs, NULL value cast to Decimal Value
+    let result_type = get_coerce_type(expr_type, &list_types);
+    match result_type {
+        None => Err(DataFusionError::Internal(format!(
+            "In expr can find the coerced type for {:?} in {:?}",
+            expr_type, list_types
+        ))),
+        Some(data_type) => {
+            // find the coerced type
+            let cast_expr = try_cast(expr, input_schema, data_type.clone())?;
+            let cast_list_expr = list
+                .into_iter()
+                .map(|list_expr| {
+                    try_cast(list_expr, input_schema, 
data_type.clone()).unwrap()
+                })
+                .collect();
+            Ok((cast_expr, cast_list_expr))
+        }
+    }
+}
+
+fn get_coerce_type(expr_type: &DataType, list_type: &[DataType]) -> 
Option<DataType> {
+    // get the equal coerced data type
+    list_type
+        .iter()
+        .fold(Some(expr_type.clone()), |left, right_type| {
+            match left {
+                None => None,
+                // TODO refactor a framework to do the data type coercion

Review Comment:
   Maybe you could start by putting this code in 
https://github.com/apache/arrow-datafusion/blob/master/datafusion/physical-expr/src/type_coercion.rs
 or 
https://github.com/apache/arrow-datafusion/blob/master/datafusion/expr/src/type_coercion.rs
 🤔 



##########
datafusion/physical-expr/src/expressions/in_list.rs:
##########
@@ -640,13 +785,62 @@ impl PhysicalExpr for InListExpr {
     }
 }
 
+type InListCastResult = (Arc<dyn PhysicalExpr>, Vec<Arc<dyn PhysicalExpr>>);
+
 /// Creates a unary expression InList
 pub fn in_list(
     expr: Arc<dyn PhysicalExpr>,
     list: Vec<Arc<dyn PhysicalExpr>>,
     negated: &bool,
+    input_schema: &Schema,
 ) -> Result<Arc<dyn PhysicalExpr>> {
-    Ok(Arc::new(InListExpr::new(expr, list, *negated)))
+    let (cast_expr, cast_list) = in_list_cast(expr, list, input_schema)?;
+    Ok(Arc::new(InListExpr::new(cast_expr, cast_list, *negated)))
+}
+
+fn in_list_cast(
+    expr: Arc<dyn PhysicalExpr>,
+    list: Vec<Arc<dyn PhysicalExpr>>,
+    input_schema: &Schema,
+) -> Result<InListCastResult> {
+    let expr_type = &expr.data_type(input_schema)?;
+    let list_types: Vec<DataType> = list
+        .iter()
+        .map(|list_expr| list_expr.data_type(input_schema).unwrap())
+        .collect();
+    // TODO in the arrow-rs, should support NULL type to Decimal Data type
+    // TODO support in the arrow-rs, NULL value cast to Decimal Value
+    let result_type = get_coerce_type(expr_type, &list_types);

Review Comment:
   ```suggestion
       // https://github.com/apache/arrow-datafusion/issues/2759
       let result_type = get_coerce_type(expr_type, &list_types);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to