alamb commented on code in PR #10445:
URL: https://github.com/apache/datafusion/pull/10445#discussion_r1598903909


##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -188,37 +181,59 @@ impl LogicalPlanBuilder {
                     n_cols
                 );
             }
-            field_types = row
-                .iter()
-                .enumerate()
-                .map(|(j, expr)| {
-                    if let Expr::Literal(ScalarValue::Null) = expr {
-                        nulls.push((i, j));
-                        Ok(field_types[j].clone())
-                    } else {
-                        let data_type = expr.get_type(&empty_schema)?;
-                        if let Some(prev_data_type) = &field_types[j] {
-                            if prev_data_type != &data_type {
-                                return plan_err!("Inconsistent data type 
across values list at row {i} column {j}. Was {prev_data_type} but found 
{data_type}")
-                            }
+        }
+
+        let empty_schema = DFSchema::empty();
+        let mut field_types: Vec<DataType> = Vec::with_capacity(n_cols);
+        for j in 0..n_cols {
+            let mut common_type: Option<DataType> = None;
+            for (i, row) in values.iter().enumerate() {
+                let value = &row[j];
+                let data_type = value.get_type(&empty_schema)?;
+                if data_type == DataType::Null {
+                    continue;
+                }
+                if let Some(prev_type) = common_type {
+                    // get common type of each column values.
+                    match values_coercion(&data_type, &prev_type) {
+                        Some(new_type) => {
+                            common_type = Some(new_type.clone());
+                        }
+                        None => {
+                            return plan_err!("Inconsistent data type across 
values list at row {i} column {j}. Was {prev_type} but found {data_type}")
                         }
-                        Ok(Some(data_type))
                     }
-                })
-                .collect::<Result<Vec<Option<DataType>>>>()?;
+                } else {
+                    common_type = Some(data_type.clone());
+                }
+            }
+            field_types.push(common_type.unwrap_or(DataType::Utf8));
+        }
+        // wrap cast if data type is not same as common type.
+        for row in &mut values {
+            for (j, field_type) in field_types.iter().enumerate() {
+                if let Expr::Literal(ScalarValue::Null) = row[j] {
+                    row[j] = 
Expr::Literal(ScalarValue::try_from(field_type.clone())?);
+                } else {
+                    let data_type = row[j].get_type(&empty_schema)?;
+                    if data_type != *field_type {
+                        row[j] = Expr::Cast(Cast {
+                            expr: Box::new(row[j].clone()),
+                            data_type: field_type.clone(),
+                        });
+                    }
+                }

Review Comment:
   I think you can write this more concisely like
   
   ```suggestion
                       row[j] = std::mem::take(&mut 
row[j]).cast_to(&field_type, &empty_schema)?;
   ```
   
   The `std::mem::take` is needed to avoid requiring a clone



##########
datafusion/expr/src/logical_plan/builder.rs:
##########
@@ -188,37 +181,59 @@ impl LogicalPlanBuilder {
                     n_cols
                 );
             }
-            field_types = row
-                .iter()
-                .enumerate()
-                .map(|(j, expr)| {
-                    if let Expr::Literal(ScalarValue::Null) = expr {
-                        nulls.push((i, j));
-                        Ok(field_types[j].clone())
-                    } else {
-                        let data_type = expr.get_type(&empty_schema)?;
-                        if let Some(prev_data_type) = &field_types[j] {
-                            if prev_data_type != &data_type {
-                                return plan_err!("Inconsistent data type 
across values list at row {i} column {j}. Was {prev_data_type} but found 
{data_type}")
-                            }
+        }
+
+        let empty_schema = DFSchema::empty();
+        let mut field_types: Vec<DataType> = Vec::with_capacity(n_cols);
+        for j in 0..n_cols {
+            let mut common_type: Option<DataType> = None;
+            for (i, row) in values.iter().enumerate() {
+                let value = &row[j];
+                let data_type = value.get_type(&empty_schema)?;
+                if data_type == DataType::Null {
+                    continue;
+                }
+                if let Some(prev_type) = common_type {
+                    // get common type of each column values.
+                    match values_coercion(&data_type, &prev_type) {

Review Comment:
   Minor: I think you could write the same thing like this slightly more 
concisely (and I don't think we need new_type.clone())
   
   ```rust
                   if let Some(prev_type) = common_type {
                       // get common type of each column values.
                       let Some(new_type) = values_coercion(&data_type, 
&prev_type) else {
                           return plan_err!("Inconsistent data type across 
values list at row {i} column {j}. Was {prev_type} but found {data_type}");
                       };
                       common_type = Some(new_type);
                   } else {
                       common_type = Some(data_type.clone());
                   }
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to