alamb commented on code in PR #13387: URL: https://github.com/apache/datafusion/pull/13387#discussion_r1840561482
########## datafusion-examples/examples/expr_api.rs: ########## @@ -316,3 +322,105 @@ fn expression_type_demo() -> Result<()> { Ok(()) } + +/// This function demonstrates how to apply type coercion to expressions, such as binary expressions. +/// +/// In most cases, manual type coercion is not required since DataFusion handles it implicitly. +/// However, certain projects may construct `ExecutionPlan`s directly from DataFusion logical expressions, +/// bypassing the construction of DataFusion logical plans. +/// Since constructing `ExecutionPlan`s from logical expressions does not automatically apply type coercion, +/// you may need to handle type coercion manually in these cases. Review Comment: ❤️ ########## datafusion-examples/examples/expr_api.rs: ########## @@ -316,3 +322,105 @@ fn expression_type_demo() -> Result<()> { Ok(()) } + +/// This function demonstrates how to apply type coercion to expressions, such as binary expressions. +/// +/// In most cases, manual type coercion is not required since DataFusion handles it implicitly. +/// However, certain projects may construct `ExecutionPlan`s directly from DataFusion logical expressions, +/// bypassing the construction of DataFusion logical plans. +/// Since constructing `ExecutionPlan`s from logical expressions does not automatically apply type coercion, +/// you may need to handle type coercion manually in these cases. +/// +/// The codes in this function shows various ways to perform type coercion on expressions: +/// 1. Using `SessionContext::create_physical_expr` +/// 2. Using `ExprSimplifier::coerce` +/// 3. Using `TreeNodeRewriter::rewrite` based on `TypeCoercionRewriter` +/// 4. Using `TreeNode::transform` +/// +/// Note, this list cannot be complete and there may have other methods to apply type coercion to expressions. +fn type_coercion_demo() -> Result<()> { + // Creates a record batch for demo. + let df_schema = DFSchema::from_unqualified_fields( + vec![Field::new("a", DataType::Int8, false)].into(), + HashMap::new(), + )?; + let i8_array = Int8Array::from_iter_values(vec![0, 1, 2]); + let batch = RecordBatch::try_new( + Arc::new(df_schema.as_arrow().to_owned()), + vec![Arc::new(i8_array) as _], + )?; + + // Constructs a binary expression for demo. + // By default, the literal `1` is translated into the Int32 type and cannot be directly compared with the Int8 type. + let expr = col("a").gt(lit(1)); + + // Evaluation with an expression that has not been type coerced cannot succeed. + let props = ExecutionProps::default(); + let physical_expr = + datafusion_physical_expr::create_physical_expr(&expr, &df_schema, &props)?; + let Err(e) = physical_expr.evaluate(&batch) else { + unreachable!() + }; Review Comment: You can also write this like using `unwrap_err` that is a bit more concise: ```suggestion let e = physical_expr.evaluate(&batch).unwrap_err(); ``` ########## datafusion-examples/examples/expr_api.rs: ########## @@ -316,3 +322,105 @@ fn expression_type_demo() -> Result<()> { Ok(()) } + +/// This function demonstrates how to apply type coercion to expressions, such as binary expressions. +/// +/// In most cases, manual type coercion is not required since DataFusion handles it implicitly. +/// However, certain projects may construct `ExecutionPlan`s directly from DataFusion logical expressions, +/// bypassing the construction of DataFusion logical plans. +/// Since constructing `ExecutionPlan`s from logical expressions does not automatically apply type coercion, +/// you may need to handle type coercion manually in these cases. +/// +/// The codes in this function shows various ways to perform type coercion on expressions: +/// 1. Using `SessionContext::create_physical_expr` +/// 2. Using `ExprSimplifier::coerce` +/// 3. Using `TreeNodeRewriter::rewrite` based on `TypeCoercionRewriter` +/// 4. Using `TreeNode::transform` +/// +/// Note, this list cannot be complete and there may have other methods to apply type coercion to expressions. Review Comment: ```suggestion /// Note, this list may not be complete and there may be other methods to apply type coercion to expressions. ``` ########## datafusion-examples/examples/expr_api.rs: ########## @@ -316,3 +322,105 @@ fn expression_type_demo() -> Result<()> { Ok(()) } + +/// This function demonstrates how to apply type coercion to expressions, such as binary expressions. +/// +/// In most cases, manual type coercion is not required since DataFusion handles it implicitly. +/// However, certain projects may construct `ExecutionPlan`s directly from DataFusion logical expressions, +/// bypassing the construction of DataFusion logical plans. +/// Since constructing `ExecutionPlan`s from logical expressions does not automatically apply type coercion, +/// you may need to handle type coercion manually in these cases. +/// +/// The codes in this function shows various ways to perform type coercion on expressions: +/// 1. Using `SessionContext::create_physical_expr` +/// 2. Using `ExprSimplifier::coerce` +/// 3. Using `TreeNodeRewriter::rewrite` based on `TypeCoercionRewriter` +/// 4. Using `TreeNode::transform` +/// +/// Note, this list cannot be complete and there may have other methods to apply type coercion to expressions. +fn type_coercion_demo() -> Result<()> { + // Creates a record batch for demo. + let df_schema = DFSchema::from_unqualified_fields( + vec![Field::new("a", DataType::Int8, false)].into(), + HashMap::new(), + )?; + let i8_array = Int8Array::from_iter_values(vec![0, 1, 2]); + let batch = RecordBatch::try_new( + Arc::new(df_schema.as_arrow().to_owned()), + vec![Arc::new(i8_array) as _], + )?; + + // Constructs a binary expression for demo. + // By default, the literal `1` is translated into the Int32 type and cannot be directly compared with the Int8 type. + let expr = col("a").gt(lit(1)); + + // Evaluation with an expression that has not been type coerced cannot succeed. + let props = ExecutionProps::default(); + let physical_expr = + datafusion_physical_expr::create_physical_expr(&expr, &df_schema, &props)?; + let Err(e) = physical_expr.evaluate(&batch) else { + unreachable!() + }; + assert!(e + .find_root() + .to_string() + .contains("Invalid comparison operation: Int8 > Int32")); + + // 1. Type coercion with `SessionContext::create_physical_expr` which implicitly applies type coercion before constructing the physical expr. + let physical_expr = + SessionContext::new().create_physical_expr(expr.clone(), &df_schema)?; + assert!(physical_expr.evaluate(&batch).is_ok()); + + // 2. Type coercion with `ExprSimplifier::coerce`. + let context = SimplifyContext::new(&props).with_schema(Arc::new(df_schema.clone())); + let simplifier = ExprSimplifier::new(context); + let coerced_expr = simplifier.coerce(expr.clone(), &df_schema)?; + let physical_expr = datafusion_physical_expr::create_physical_expr( + &coerced_expr, + &df_schema, + &props, + )?; + assert!(physical_expr.evaluate(&batch).is_ok()); + + // 3. Type coercion with `TypeCoercionRewriter`. + let coerced_expr = expr + .clone() + .rewrite(&mut TypeCoercionRewriter::new(&df_schema))? + .data; + let physical_expr = datafusion_physical_expr::create_physical_expr( + &coerced_expr, + &df_schema, + &props, + )?; + assert!(physical_expr.evaluate(&batch).is_ok()); + + // 4. Type coercion with manual transformation. Review Comment: ```suggestion // 4. Apply explict type coercion by manually rewriting the expression ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org