paleolimbot commented on code in PR #17986:
URL: https://github.com/apache/datafusion/pull/17986#discussion_r2437639773


##########
datafusion/common/src/metadata.rs:
##########
@@ -0,0 +1,363 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{collections::BTreeMap, sync::Arc};
+
+use arrow::datatypes::{DataType, Field};
+use hashbrown::HashMap;
+
+use crate::{error::_plan_err, DataFusionError, ScalarValue};
+
+/// A [`ScalarValue`] with optional [`FieldMetadata`]
+#[derive(Debug, Clone)]
+pub struct Literal {
+    pub value: ScalarValue,
+    pub metadata: Option<FieldMetadata>,
+}
+
+impl Literal {
+    /// Create a new Literal from a scalar value with optional 
[`FieldMetadata`]
+    pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
+        Self { value, metadata }
+    }

Review Comment:
   I am not sure `Literal` is the best name here (we already have the trait 
`datafusion_expr::Literal` and `datafusion_physical_expr::Literal`), but 
`ScalarValueAndMetadata` seemed verbose and using `(ScalarValue, 
Option<FieldMetadata>)` has been used  in enough places that I think having its 
own struct makes sense.
   
   Other places where we pair a `ScalarValue` with metadata are 
`Expr::Literal`, and `ConstSimplifyResult::Simplified`.
   
   I don't have strong opinions here and am happy to have this be whatever.



##########
datafusion/common/src/metadata.rs:
##########
@@ -0,0 +1,363 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{collections::BTreeMap, sync::Arc};
+
+use arrow::datatypes::{DataType, Field};
+use hashbrown::HashMap;
+
+use crate::{error::_plan_err, DataFusionError, ScalarValue};
+
+/// A [`ScalarValue`] with optional [`FieldMetadata`]
+#[derive(Debug, Clone)]
+pub struct Literal {
+    pub value: ScalarValue,
+    pub metadata: Option<FieldMetadata>,
+}
+
+impl Literal {
+    /// Create a new Literal from a scalar value with optional 
[`FieldMetadata`]
+    pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
+        Self { value, metadata }
+    }
+
+    /// Access the underlying [ScalarValue] storage
+    pub fn value(&self) -> &ScalarValue {
+        &self.value
+    }
+
+    /// Access the [FieldMetadata] attached to this value, if any
+    pub fn metadata(&self) -> Option<&FieldMetadata> {
+        self.metadata.as_ref()
+    }
+
+    /// Consume self and return components
+    pub fn into_inner(self) -> (ScalarValue, Option<FieldMetadata>) {
+        (self.value, self.metadata)
+    }
+
+    /// Cast this literal's storage type
+    ///
+    /// This operation assumes that if the underlying [ScalarValue] can be 
casted
+    /// to a given type that any extension type represented by the metadata is 
also
+    /// valid.
+    pub fn cast_storage_to(
+        &self,
+        target_type: &DataType,
+    ) -> Result<Self, DataFusionError> {
+        let new_value = self.value().cast_to(target_type)?;
+        Ok(Literal::new(new_value, self.metadata.clone()))
+    }
+}
+
+/// Assert equality of data types where one or both sides may have field 
metadata
+///
+/// This currently compares absent metadata (e.g., one side was a DataType) and
+/// empty metadata (e.g., one side was a field where the field had no metadata)
+/// as equal and uses byte-for-byte comparison for the keys and values of the
+/// fields, even though this is potentially too strict for some cases (e.g.,
+/// extension types where extension metadata is represented by JSON, or cases
+/// where field metadata is orthogonal to the interpretation of the data type).
+///
+/// Returns a planning error with suitably formatted type representations if
+/// actual and expected do not compare to equal.
+pub fn check_metadata_with_storage_equal(
+    actual: (
+        &DataType,
+        Option<&std::collections::HashMap<String, String>>,
+    ),
+    expected: (
+        &DataType,
+        Option<&std::collections::HashMap<String, String>>,
+    ),
+    what: &str,
+    context: &str,
+) -> Result<(), DataFusionError> {

Review Comment:
   This is kind of ugly but there are two places in this PR that do this check 
(type equality). I figured at least collecting the checks in one place would 
make it easier to do something smarter later if it comes up.



##########
datafusion/common/src/metadata.rs:
##########
@@ -0,0 +1,363 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::{collections::BTreeMap, sync::Arc};
+
+use arrow::datatypes::{DataType, Field};
+use hashbrown::HashMap;
+
+use crate::{error::_plan_err, DataFusionError, ScalarValue};
+
+/// A [`ScalarValue`] with optional [`FieldMetadata`]
+#[derive(Debug, Clone)]
+pub struct Literal {
+    pub value: ScalarValue,
+    pub metadata: Option<FieldMetadata>,
+}
+
+impl Literal {
+    /// Create a new Literal from a scalar value with optional 
[`FieldMetadata`]
+    pub fn new(value: ScalarValue, metadata: Option<FieldMetadata>) -> Self {
+        Self { value, metadata }
+    }
+
+    /// Access the underlying [ScalarValue] storage
+    pub fn value(&self) -> &ScalarValue {
+        &self.value
+    }
+
+    /// Access the [FieldMetadata] attached to this value, if any
+    pub fn metadata(&self) -> Option<&FieldMetadata> {
+        self.metadata.as_ref()
+    }
+
+    /// Consume self and return components
+    pub fn into_inner(self) -> (ScalarValue, Option<FieldMetadata>) {
+        (self.value, self.metadata)
+    }
+
+    /// Cast this literal's storage type
+    ///
+    /// This operation assumes that if the underlying [ScalarValue] can be 
casted
+    /// to a given type that any extension type represented by the metadata is 
also
+    /// valid.
+    pub fn cast_storage_to(
+        &self,
+        target_type: &DataType,
+    ) -> Result<Self, DataFusionError> {
+        let new_value = self.value().cast_to(target_type)?;
+        Ok(Literal::new(new_value, self.metadata.clone()))
+    }
+}
+
+/// Assert equality of data types where one or both sides may have field 
metadata
+///
+/// This currently compares absent metadata (e.g., one side was a DataType) and
+/// empty metadata (e.g., one side was a field where the field had no metadata)
+/// as equal and uses byte-for-byte comparison for the keys and values of the
+/// fields, even though this is potentially too strict for some cases (e.g.,
+/// extension types where extension metadata is represented by JSON, or cases
+/// where field metadata is orthogonal to the interpretation of the data type).
+///
+/// Returns a planning error with suitably formatted type representations if
+/// actual and expected do not compare to equal.
+pub fn check_metadata_with_storage_equal(
+    actual: (
+        &DataType,
+        Option<&std::collections::HashMap<String, String>>,
+    ),
+    expected: (
+        &DataType,
+        Option<&std::collections::HashMap<String, String>>,
+    ),
+    what: &str,
+    context: &str,
+) -> Result<(), DataFusionError> {
+    if actual.0 != expected.0 {
+        return _plan_err!(
+            "Expected {what} of type {}, got {}{context}",
+            format_type_and_metadata(expected.0, expected.1),
+            format_type_and_metadata(actual.0, actual.1)
+        );
+    }
+
+    let metadata_equal = match (actual.1, expected.1) {
+        (None, None) => true,
+        (None, Some(expected_metadata)) => expected_metadata.is_empty(),
+        (Some(actual_metadata), None) => actual_metadata.is_empty(),
+        (Some(actual_metadata), Some(expected_metadata)) => {
+            actual_metadata == expected_metadata
+        }
+    };
+
+    if !metadata_equal {
+        return _plan_err!(
+            "Expected {what} of type {}, got {}{context}",
+            format_type_and_metadata(expected.0, expected.1),
+            format_type_and_metadata(actual.0, actual.1)
+        );
+    }
+
+    Ok(())
+}
+
+/// Given a data type represented by storage and optional metadata, generate
+/// a user-facing string
+///
+/// This function exists to reduce the number of Field debug strings that are
+/// used to communicate type information in error messages and plan explain
+/// renderings.
+pub fn format_type_and_metadata(
+    data_type: &DataType,
+    metadata: Option<&std::collections::HashMap<String, String>>,
+) -> String {

Review Comment:
   This is to keep the `LogicalType::Statement(Prepared {..})` output in the 
plan text from dumping the debug output from `Field` so that it is mostly 
unchanged (unless there is, in fact, metadata). Also helps with the error 
message above.



##########
datafusion/expr/src/expr_schema.rs:
##########
@@ -433,18 +436,18 @@ impl ExprSchemable for Expr {
                 ..
             }) => {
                 let field = match &**expr {
-                    Expr::Placeholder(Placeholder { data_type, .. }) => {
-                        match &data_type {
-                            None => schema
-                                
.data_type_and_nullable(&Column::from_name(name))
-                                .map(|(d, n)| Field::new(&schema_name, 
d.clone(), n)),
-                            Some(dt) => Ok(Field::new(
-                                &schema_name,
-                                dt.clone(),
-                                expr.nullable(schema)?,
-                            )),
-                        }
-                    }
+                    Expr::Placeholder(Placeholder { field, .. }) => match 
&field {
+                        // TODO: This seems to be pulling metadata/types from 
the schema
+                        // based on the Alias destination. name? Is this 
correct?
+                        None => schema
+                            .field_from_column(&Column::from_name(name))
+                            .map(|f| f.clone().with_name(&schema_name)),

Review Comment:
   I don't understand this first branch and why it would pull metadata from a 
schema based on the destination name?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to