Blizzara commented on code in PR #12245:
URL: https://github.com/apache/datafusion/pull/12245#discussion_r1745444635


##########
datafusion/substrait/src/logical_plan/consumer.rs:
##########
@@ -850,6 +864,79 @@ pub async fn from_substrait_rel(
     }
 }
 
+/// Ensures that the given Substrait schema is compatible with the schema as 
given by DataFusion
+///
+/// This means:
+/// 1. All fields present in the Substrait schema are present in the 
DataFusion schema. The
+///    DataFusion schema may have MORE fields, but not the other way around.
+/// 2. All fields are compatible. See [`ensure_field_compatability`] for 
details
+///
+/// This function returns a DataFrame with fields adjusted if necessary in the 
event that the
+/// Substrait schema is a subset of the DataFusion schema.
+fn ensure_schema_compatability(
+    table: DataFrame,
+    substrait_schema: DFSchema,
+) -> Result<DataFrame> {
+    // For now strip the qualifiers from the DataFusion and Substrait schemas 
to make comparisons
+    // easier as there are issues around qualifier case.
+    // TODO: ensure that qualifiers are the same
+    let df_schema = table.schema().to_owned().strip_qualifiers();
+    if df_schema.logically_equivalent_names_and_types(&substrait_schema) {
+        return Ok(table);
+    }
+    let selected_columns = substrait_schema
+        .strip_qualifiers()
+        .fields()
+        .iter()
+        .map(|substrait_field| {
+            let df_field =
+                df_schema.field_with_unqualified_name(substrait_field.name())?;
+            ensure_field_compatability(df_field, substrait_field)?;
+            Ok(col(format!("\"{}\"", df_field.name())))
+        })
+        .collect::<Result<_>>()?;
+
+    table.select(selected_columns)
+}
+
+/// Ensures that the given Substrait field is compatible with the given 
DataFusion field
+///
+/// A field is compatible between Substrait and DataFusion if:
+/// 1. They have logically equivalent types.
+/// 2. They have the same nullability OR the Substrait field is nullable and 
the DataFusion fields
+///    is not nullable.
+///
+/// If a Substrait field is not nullable, the Substrait plan may be built 
around assuming it is not
+/// nullable. As such if DataFusion has that field as not nullable the plan 
should be rejected.
+fn ensure_field_compatability(
+    datafusion_field: &Field,
+    substrait_field: &Field,
+) -> Result<()> {
+    if DFSchema::datatype_is_logically_equal(
+        datafusion_field.data_type(),
+        substrait_field.data_type(),
+    ) {
+        if (datafusion_field.is_nullable() == substrait_field.is_nullable())
+            || (substrait_field.is_nullable() && 
!datafusion_field.is_nullable())
+        {
+            Ok(())
+        } else {
+            // TODO: from_substrait_struct_type needs to be updated to set the 
nullability correctly. It defaults to true for now.
+            substrait_err!(
+                "Field '{}' is nullable in the Substrait schema but not 
nullable in the DataFusion schema.",

Review Comment:
   Isn't this the other way around? Nullable in DF but not nullable in Substrait



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to