vbarua commented on code in PR #13931: URL: https://github.com/apache/datafusion/pull/13931#discussion_r1899707055
########## datafusion/substrait/src/logical_plan/producer.rs: ########## @@ -998,450 +1304,418 @@ pub fn make_binary_op_scalar_func( /// Convert DataFusion Expr to Substrait Rex /// /// # Arguments -/// -/// * `expr` - DataFusion expression to be parse into a Substrait expression -/// * `schema` - DataFusion input schema for looking up field qualifiers -/// * `col_ref_offset` - Offset for calculating Substrait field reference indices. -/// This should only be set by caller with more than one input relations i.e. Join. -/// Substrait expects one set of indices when joining two relations. -/// Let's say `left` and `right` have `m` and `n` columns, respectively. The `right` -/// relation will have column indices from `0` to `n-1`, however, Substrait will expect -/// the `right` indices to be offset by the `left`. This means Substrait will expect to -/// evaluate the join condition expression on indices [0 .. n-1, n .. n+m-1]. For example: -/// ```SELECT * -/// FROM t1 -/// JOIN t2 -/// ON t1.c1 = t2.c0;``` -/// where t1 consists of columns [c0, c1, c2], and t2 = columns [c0, c1] -/// the join condition should become -/// `col_ref(1) = col_ref(3 + 0)` -/// , where `3` is the number of `left` columns (`col_ref_offset`) and `0` is the index -/// of the join key column from `right` -/// * `extensions` - Substrait extension info. Contains registered function information -#[allow(deprecated)] +/// * `producer` - SubstraitProducer implementation which the handles the actual conversion +/// * `expr` - DataFusion expression to convert into a Substrait expression +/// * `schema` - DataFusion input schema for looking up columns pub fn to_substrait_rex( - state: &dyn SubstraitPlanningState, + producer: &mut impl SubstraitProducer, expr: &Expr, schema: &DFSchemaRef, - col_ref_offset: usize, - extensions: &mut Extensions, ) -> Result<Expression> { match expr { - Expr::InList(InList { - expr, - list, - negated, - }) => { - let substrait_list = list - .iter() - .map(|x| to_substrait_rex(state, x, schema, col_ref_offset, extensions)) - .collect::<Result<Vec<Expression>>>()?; - let substrait_expr = - to_substrait_rex(state, expr, schema, col_ref_offset, extensions)?; - - let substrait_or_list = Expression { - rex_type: Some(RexType::SingularOrList(Box::new(SingularOrList { - value: Some(Box::new(substrait_expr)), - options: substrait_list, - }))), - }; - - if *negated { - let function_anchor = extensions.register_function("not".to_string()); - - Ok(Expression { - rex_type: Some(RexType::ScalarFunction(ScalarFunction { - function_reference: function_anchor, - arguments: vec![FunctionArgument { - arg_type: Some(ArgType::Value(substrait_or_list)), - }], - output_type: None, - args: vec![], - options: vec![], - })), - }) - } else { - Ok(substrait_or_list) - } + Expr::Alias(expr) => producer.consume_alias(expr, schema), + Expr::Column(expr) => producer.consume_column(expr, schema), + Expr::Literal(expr) => producer.consume_literal(expr), + Expr::BinaryExpr(expr) => producer.consume_binary_expr(expr, schema), + Expr::Like(expr) => producer.consume_like(expr, schema), + Expr::SimilarTo(_) => not_impl_err!("SimilarTo is not supported"), + Expr::Not(_) => producer.consume_unary_expr(expr, schema), + Expr::IsNotNull(_) => producer.consume_unary_expr(expr, schema), + Expr::IsNull(_) => producer.consume_unary_expr(expr, schema), + Expr::IsTrue(_) => producer.consume_unary_expr(expr, schema), + Expr::IsFalse(_) => producer.consume_unary_expr(expr, schema), + Expr::IsUnknown(_) => producer.consume_unary_expr(expr, schema), + Expr::IsNotTrue(_) => producer.consume_unary_expr(expr, schema), + Expr::IsNotFalse(_) => producer.consume_unary_expr(expr, schema), + Expr::IsNotUnknown(_) => producer.consume_unary_expr(expr, schema), + Expr::Negative(_) => producer.consume_unary_expr(expr, schema), + Expr::Between(expr) => producer.consume_between(expr, schema), + Expr::Case(expr) => producer.consume_case(expr, schema), + Expr::Cast(expr) => producer.consume_cast(expr, schema), + Expr::TryCast(expr) => producer.consume_try_cast(expr, schema), + Expr::ScalarFunction(expr) => producer.consume_scalar_function(expr, schema), + Expr::AggregateFunction(_) => { + internal_err!( + "AggregateFunction should only be encountered as part of a LogicalPlan::Aggregate" + ) } - Expr::ScalarFunction(fun) => { - let mut arguments: Vec<FunctionArgument> = vec![]; - for arg in &fun.args { - arguments.push(FunctionArgument { - arg_type: Some(ArgType::Value(to_substrait_rex( - state, - arg, - schema, - col_ref_offset, - extensions, - )?)), - }); - } + Expr::WindowFunction(expr) => producer.consume_window_function(expr, schema), + Expr::InList(expr) => producer.consume_in_list(expr, schema), + Expr::InSubquery(expr) => producer.consume_in_subquery(expr, schema), + _ => not_impl_err!("Cannot convert {expr:?} to Substrait"), Review Comment: Easy enough to expand out. Updated. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org