This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 33b9afa911 Allow SQL `TypePlanner` to plan SQL types as extension 
types (#20676)
33b9afa911 is described below

commit 33b9afa911f7d01edac0053beb17ca298111ea19
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Mar 9 15:46:51 2026 -0500

    Allow SQL `TypePlanner` to plan SQL types as extension types (#20676)
    
    ## Which issue does this PR close?
    
    - Closes #20675
    
    ## Rationale for this change
    
    The existing enum `SQLDataType` has a number of existing members that
    have canonical Arrow extension type equivalents; however, the
    `TypePlanner` trait only supports returning `DataType` (which cannot
    represent an Arrow extension type).
    
    This will be substantially more useful after
    https://github.com/apache/datafusion/pull/18136, as the SQL planner
    inserts casts in a number of places (and currently the extension
    metadata of those casts is dropped when the logical cast is created).
    
    ## What changes are included in this PR?
    
    This PR adds a `fn plan_type_field()` member to the `TypePlanner` trait.
    The only place that the previously existing `plan_type()` member was
    called was already wrapping it in a `FieldRef` and so few other changes
    were needed.
    
    ## Are these changes tested?
    
    Yes
    
    ## Are there any user-facing changes?
    
    Existing `TypePlanner`s will continue to work and an example was added
    for supporting the UUID type.
---
 datafusion/core/src/execution/context/mod.rs    |  8 +++-
 datafusion/expr/src/planner.rs                  | 18 +++++++++
 datafusion/sql/src/expr/mod.rs                  | 19 +++------
 datafusion/sql/src/planner.rs                   |  4 +-
 datafusion/sql/tests/common/mod.rs              | 16 +++++++-
 datafusion/sql/tests/sql_integration.rs         |  8 ++++
 docs/source/library-user-guide/extending-sql.md | 51 +++++++++++++++++++++++--
 7 files changed, 101 insertions(+), 23 deletions(-)

diff --git a/datafusion/core/src/execution/context/mod.rs 
b/datafusion/core/src/execution/context/mod.rs
index cdc50167d1..58d433e7dd 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -2218,7 +2218,9 @@ mod tests {
     use crate::test;
     use crate::test_util::{plan_and_collect, populate_csv_partitions};
     use arrow::datatypes::{DataType, TimeUnit};
+    use arrow_schema::FieldRef;
     use datafusion_common::DataFusionError;
+    use datafusion_common::datatype::DataTypeExt;
     use std::error::Error;
     use std::path::PathBuf;
 
@@ -2735,7 +2737,7 @@ mod tests {
     struct MyTypePlanner {}
 
     impl TypePlanner for MyTypePlanner {
-        fn plan_type(&self, sql_type: &ast::DataType) -> 
Result<Option<DataType>> {
+        fn plan_type_field(&self, sql_type: &ast::DataType) -> 
Result<Option<FieldRef>> {
             match sql_type {
                 ast::DataType::Datetime(precision) => {
                     let precision = match precision {
@@ -2745,7 +2747,9 @@ mod tests {
                         None | Some(9) => TimeUnit::Nanosecond,
                         _ => unreachable!(),
                     };
-                    Ok(Some(DataType::Timestamp(precision, None)))
+                    Ok(Some(
+                        DataType::Timestamp(precision, 
None).into_nullable_field_ref(),
+                    ))
                 }
                 _ => Ok(None),
             }
diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs
index 837a9eefe2..197ac8c035 100644
--- a/datafusion/expr/src/planner.rs
+++ b/datafusion/expr/src/planner.rs
@@ -434,10 +434,28 @@ pub trait TypePlanner: Debug + Send + Sync {
     /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`]
     ///
     /// Returns None if not possible
+    #[deprecated(since = "53.0.0", note = "Use plan_type_field()")]
     fn plan_type(
         &self,
         _sql_type: &sqlparser::ast::DataType,
     ) -> Result<Option<DataType>> {
         Ok(None)
     }
+
+    /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`FieldRef`]
+    ///
+    /// Returns None if not possible. Unlike [`Self::plan_type`], 
`plan_type_field()`
+    /// makes it possible to express extension types (e.g., `arrow.uuid`) or 
otherwise
+    /// insert metadata into the DataFusion type representation. The default 
implementation
+    /// falls back on [`Self::plan_type`] for backward compatibility and wraps 
the result
+    /// in a nullable field reference.
+    fn plan_type_field(
+        &self,
+        sql_type: &sqlparser::ast::DataType,
+    ) -> Result<Option<FieldRef>> {
+        #[expect(deprecated)]
+        Ok(self
+            .plan_type(sql_type)?
+            .map(|data_type| data_type.into_nullable_field_ref()))
+    }
 }
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 7902eed1e6..cd42b68949 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -292,15 +292,13 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                     return not_impl_err!("CAST with format is not supported: 
{format}");
                 }
 
-                Ok(Expr::TryCast(TryCast::new(
+                Ok(Expr::TryCast(TryCast::new_from_field(
                     Box::new(self.sql_expr_to_logical_expr(
                         *expr,
                         schema,
                         planner_context,
                     )?),
-                    self.convert_data_type_to_field(&data_type)?
-                        .data_type()
-                        .clone(),
+                    self.convert_data_type_to_field(&data_type)?,
                 )))
             }
 
@@ -308,11 +306,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
                 data_type,
                 value,
                 uses_odbc_syntax: _,
-            }) => Ok(Expr::Cast(Cast::new(
+            }) => Ok(Expr::Cast(Cast::new_from_field(
                 Box::new(lit(value.into_string().unwrap())),
-                self.convert_data_type_to_field(&data_type)?
-                    .data_type()
-                    .clone(),
+                self.convert_data_type_to_field(&data_type)?,
             ))),
 
             SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
@@ -1061,12 +1057,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
             _ => expr,
         };
 
-        // Currently drops metadata attached to the type
-        // https://github.com/apache/datafusion/issues/18060
-        Ok(Expr::Cast(Cast::new(
-            Box::new(expr),
-            dt.data_type().clone(),
-        )))
+        Ok(Expr::Cast(Cast::new_from_field(Box::new(expr), dt)))
     }
 
     /// Extracts the root expression and access chain from a compound 
expression.
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 307f28e8ff..b7e270e4f0 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -621,9 +621,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     ) -> Result<FieldRef> {
         // First check if any of the registered type_planner can handle this 
type
         if let Some(type_planner) = self.context_provider.get_type_planner()
-            && let Some(data_type) = type_planner.plan_type(sql_type)?
+            && let Some(data_type) = type_planner.plan_type_field(sql_type)?
         {
-            return Ok(data_type.into_nullable_field_ref());
+            return Ok(data_type);
         }
 
         // If no type_planner can handle this type, use the default conversion
diff --git a/datafusion/sql/tests/common/mod.rs 
b/datafusion/sql/tests/common/mod.rs
index 4b8667c3c0..5caade3002 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -23,6 +23,7 @@ use std::{sync::Arc, vec};
 
 use arrow::datatypes::*;
 use datafusion_common::config::ConfigOptions;
+use datafusion_common::datatype::DataTypeExt;
 use datafusion_common::file_options::file_type::FileType;
 use datafusion_common::{DFSchema, GetExt, Result, TableReference, plan_err};
 use datafusion_expr::planner::{ExprPlanner, PlannerResult, TypePlanner};
@@ -341,8 +342,17 @@ impl TableSource for EmptyTable {
 pub struct CustomTypePlanner {}
 
 impl TypePlanner for CustomTypePlanner {
-    fn plan_type(&self, sql_type: &sqlparser::ast::DataType) -> 
Result<Option<DataType>> {
+    fn plan_type_field(
+        &self,
+        sql_type: &sqlparser::ast::DataType,
+    ) -> Result<Option<FieldRef>> {
         match sql_type {
+            sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new(
+                Field::new("", DataType::FixedSizeBinary(16), 
true).with_metadata(
+                    [("ARROW:extension:name".to_string(), 
"arrow.uuid".to_string())]
+                        .into(),
+                ),
+            ))),
             sqlparser::ast::DataType::Datetime(precision) => {
                 let precision = match precision {
                     Some(0) => TimeUnit::Second,
@@ -351,7 +361,9 @@ impl TypePlanner for CustomTypePlanner {
                     None | Some(9) => TimeUnit::Nanosecond,
                     _ => unreachable!(),
                 };
-                Ok(Some(DataType::Timestamp(precision, None)))
+                Ok(Some(
+                    DataType::Timestamp(precision, 
None).into_nullable_field_ref(),
+                ))
             }
             _ => Ok(None),
         }
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index 9570336e99..29c17be69c 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -4727,6 +4727,14 @@ fn test_custom_type_plan() -> Result<()> {
     "#
     );
 
+    let plan = plan_sql("SELECT UUID '00010203-0405-0607-0809-000102030506'");
+    assert_snapshot!(
+        plan,
+        @r#"
+    Projection: CAST(Utf8("00010203-0405-0607-0809-000102030506") AS 
FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>)
+      EmptyRelation: rows=1
+    "#
+    );
     Ok(())
 }
 
diff --git a/docs/source/library-user-guide/extending-sql.md 
b/docs/source/library-user-guide/extending-sql.md
index 687d884895..eea5b3b1ac 100644
--- a/docs/source/library-user-guide/extending-sql.md
+++ b/docs/source/library-user-guide/extending-sql.md
@@ -158,7 +158,7 @@ when you need to support SQL types that aren't natively 
recognized.
 
 ```rust
 # use std::sync::Arc;
-# use arrow::datatypes::{DataType, TimeUnit};
+# use arrow::datatypes::{DataType, FieldRef, TimeUnit};
 # use datafusion::error::Result;
 # use datafusion::prelude::*;
 # use datafusion::execution::SessionStateBuilder;
@@ -169,7 +169,7 @@ use datafusion_expr::planner::TypePlanner;
 struct MyTypePlanner;
 
 impl TypePlanner for MyTypePlanner {
-    fn plan_type(&self, sql_type: &ast::DataType) -> Result<Option<DataType>> {
+    fn plan_type_field(&self, sql_type: &ast::DataType) -> 
Result<Option<FieldRef>> {
         match sql_type {
             // Map DATETIME(precision) to Arrow Timestamp
             ast::DataType::Datetime(precision) => {
@@ -180,7 +180,9 @@ impl TypePlanner for MyTypePlanner {
                     None | Some(9) => TimeUnit::Nanosecond,
                     _ => return Ok(None), // Let default handling take over
                 };
-                Ok(Some(DataType::Timestamp(time_unit, None)))
+                Ok(Some(
+                    DataType::Timestamp(time_unit, 
None).into_nullable_field_ref()
+                ))
             }
             _ => Ok(None), // Return None for types we don't handle
         }
@@ -202,6 +204,49 @@ async fn main() -> Result<()> {
 }
 ```
 
+#### Example: Supporting the UUID Type
+
+```rust
+# use std::sync::Arc;
+# use arrow::datatypes::{DataType, FieldRef, TimeUnit};
+# use datafusion::error::Result;
+# use datafusion::prelude::*;
+# use datafusion::execution::SessionStateBuilder;
+use datafusion_expr::planner::TypePlanner;
+# use sqlparser::ast;
+
+#[derive(Debug)]
+struct MyTypePlanner;
+
+impl TypePlanner for MyTypePlanner {
+    fn plan_type_field(&self, sql_type: &ast::DataType) -> 
Result<Option<FieldRef>> {
+        match sql_type {
+            sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new(
+                Field::new("", DataType::FixedSizeBinary(16), 
true).with_metadata(
+                    [("ARROW:extension:name".to_string(), 
"arrow.uuid".to_string())]
+                        .into(),
+                ),
+            ))),
+            _ => Ok(None),
+        }
+    }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let state = SessionStateBuilder::new()
+        .with_default_features()
+        .with_type_planner(Arc::new(MyTypePlanner))
+        .build();
+
+    let ctx = SessionContext::new_with_state(state);
+
+    // Now UUID type is recognized
+    ctx.sql("CREATE TABLE idx (uuid UUID)").await?;
+    Ok(())
+}
+```
+
 For more details, see the [TypePlanner API documentation].
 
 ### RelationPlanner: Custom FROM Clause Elements


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to