alamb commented on code in PR #10445: URL: https://github.com/apache/datafusion/pull/10445#discussion_r1598903909
########## datafusion/expr/src/logical_plan/builder.rs: ########## @@ -188,37 +181,59 @@ impl LogicalPlanBuilder { n_cols ); } - field_types = row - .iter() - .enumerate() - .map(|(j, expr)| { - if let Expr::Literal(ScalarValue::Null) = expr { - nulls.push((i, j)); - Ok(field_types[j].clone()) - } else { - let data_type = expr.get_type(&empty_schema)?; - if let Some(prev_data_type) = &field_types[j] { - if prev_data_type != &data_type { - return plan_err!("Inconsistent data type across values list at row {i} column {j}. Was {prev_data_type} but found {data_type}") - } + } + + let empty_schema = DFSchema::empty(); + let mut field_types: Vec<DataType> = Vec::with_capacity(n_cols); + for j in 0..n_cols { + let mut common_type: Option<DataType> = None; + for (i, row) in values.iter().enumerate() { + let value = &row[j]; + let data_type = value.get_type(&empty_schema)?; + if data_type == DataType::Null { + continue; + } + if let Some(prev_type) = common_type { + // get common type of each column values. + match values_coercion(&data_type, &prev_type) { + Some(new_type) => { + common_type = Some(new_type.clone()); + } + None => { + return plan_err!("Inconsistent data type across values list at row {i} column {j}. Was {prev_type} but found {data_type}") } - Ok(Some(data_type)) } - }) - .collect::<Result<Vec<Option<DataType>>>>()?; + } else { + common_type = Some(data_type.clone()); + } + } + field_types.push(common_type.unwrap_or(DataType::Utf8)); + } + // wrap cast if data type is not same as common type. + for row in &mut values { + for (j, field_type) in field_types.iter().enumerate() { + if let Expr::Literal(ScalarValue::Null) = row[j] { + row[j] = Expr::Literal(ScalarValue::try_from(field_type.clone())?); + } else { + let data_type = row[j].get_type(&empty_schema)?; + if data_type != *field_type { + row[j] = Expr::Cast(Cast { + expr: Box::new(row[j].clone()), + data_type: field_type.clone(), + }); + } + } Review Comment: I think you can write this more concisely like ```suggestion row[j] = std::mem::take(&mut row[j]).cast_to(&field_type, &empty_schema)?; ``` The `std::mem::take` is needed to avoid requiring a clone ########## datafusion/expr/src/logical_plan/builder.rs: ########## @@ -188,37 +181,59 @@ impl LogicalPlanBuilder { n_cols ); } - field_types = row - .iter() - .enumerate() - .map(|(j, expr)| { - if let Expr::Literal(ScalarValue::Null) = expr { - nulls.push((i, j)); - Ok(field_types[j].clone()) - } else { - let data_type = expr.get_type(&empty_schema)?; - if let Some(prev_data_type) = &field_types[j] { - if prev_data_type != &data_type { - return plan_err!("Inconsistent data type across values list at row {i} column {j}. Was {prev_data_type} but found {data_type}") - } + } + + let empty_schema = DFSchema::empty(); + let mut field_types: Vec<DataType> = Vec::with_capacity(n_cols); + for j in 0..n_cols { + let mut common_type: Option<DataType> = None; + for (i, row) in values.iter().enumerate() { + let value = &row[j]; + let data_type = value.get_type(&empty_schema)?; + if data_type == DataType::Null { + continue; + } + if let Some(prev_type) = common_type { + // get common type of each column values. + match values_coercion(&data_type, &prev_type) { Review Comment: Minor: I think you could write the same thing like this slightly more concisely (and I don't think we need new_type.clone()) ```rust if let Some(prev_type) = common_type { // get common type of each column values. let Some(new_type) = values_coercion(&data_type, &prev_type) else { return plan_err!("Inconsistent data type across values list at row {i} column {j}. Was {prev_type} but found {data_type}"); }; common_type = Some(new_type); } else { common_type = Some(data_type.clone()); } ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org