This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 33b9afa911 Allow SQL `TypePlanner` to plan SQL types as extension
types (#20676)
33b9afa911 is described below
commit 33b9afa911f7d01edac0053beb17ca298111ea19
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Mar 9 15:46:51 2026 -0500
Allow SQL `TypePlanner` to plan SQL types as extension types (#20676)
## Which issue does this PR close?
- Closes #20675
## Rationale for this change
The existing enum `SQLDataType` has a number of existing members that
have canonical Arrow extension type equivalents; however, the
`TypePlanner` trait only supports returning `DataType` (which cannot
represent an Arrow extension type).
This will be substantially more useful after
https://github.com/apache/datafusion/pull/18136, as the SQL planner
inserts casts in a number of places (and currently the extension
metadata of those casts is dropped when the logical cast is created).
## What changes are included in this PR?
This PR adds a `fn plan_type_field()` member to the `TypePlanner` trait.
The only place that the previously existing `plan_type()` member was
called was already wrapping it in a `FieldRef` and so few other changes
were needed.
## Are these changes tested?
Yes
## Are there any user-facing changes?
Existing `TypePlanner`s will continue to work and an example was added
for supporting the UUID type.
---
datafusion/core/src/execution/context/mod.rs | 8 +++-
datafusion/expr/src/planner.rs | 18 +++++++++
datafusion/sql/src/expr/mod.rs | 19 +++------
datafusion/sql/src/planner.rs | 4 +-
datafusion/sql/tests/common/mod.rs | 16 +++++++-
datafusion/sql/tests/sql_integration.rs | 8 ++++
docs/source/library-user-guide/extending-sql.md | 51 +++++++++++++++++++++++--
7 files changed, 101 insertions(+), 23 deletions(-)
diff --git a/datafusion/core/src/execution/context/mod.rs
b/datafusion/core/src/execution/context/mod.rs
index cdc50167d1..58d433e7dd 100644
--- a/datafusion/core/src/execution/context/mod.rs
+++ b/datafusion/core/src/execution/context/mod.rs
@@ -2218,7 +2218,9 @@ mod tests {
use crate::test;
use crate::test_util::{plan_and_collect, populate_csv_partitions};
use arrow::datatypes::{DataType, TimeUnit};
+ use arrow_schema::FieldRef;
use datafusion_common::DataFusionError;
+ use datafusion_common::datatype::DataTypeExt;
use std::error::Error;
use std::path::PathBuf;
@@ -2735,7 +2737,7 @@ mod tests {
struct MyTypePlanner {}
impl TypePlanner for MyTypePlanner {
- fn plan_type(&self, sql_type: &ast::DataType) ->
Result<Option<DataType>> {
+ fn plan_type_field(&self, sql_type: &ast::DataType) ->
Result<Option<FieldRef>> {
match sql_type {
ast::DataType::Datetime(precision) => {
let precision = match precision {
@@ -2745,7 +2747,9 @@ mod tests {
None | Some(9) => TimeUnit::Nanosecond,
_ => unreachable!(),
};
- Ok(Some(DataType::Timestamp(precision, None)))
+ Ok(Some(
+ DataType::Timestamp(precision,
None).into_nullable_field_ref(),
+ ))
}
_ => Ok(None),
}
diff --git a/datafusion/expr/src/planner.rs b/datafusion/expr/src/planner.rs
index 837a9eefe2..197ac8c035 100644
--- a/datafusion/expr/src/planner.rs
+++ b/datafusion/expr/src/planner.rs
@@ -434,10 +434,28 @@ pub trait TypePlanner: Debug + Send + Sync {
/// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`DataType`]
///
/// Returns None if not possible
+ #[deprecated(since = "53.0.0", note = "Use plan_type_field()")]
fn plan_type(
&self,
_sql_type: &sqlparser::ast::DataType,
) -> Result<Option<DataType>> {
Ok(None)
}
+
+ /// Plan SQL [`sqlparser::ast::DataType`] to DataFusion [`FieldRef`]
+ ///
+ /// Returns None if not possible. Unlike [`Self::plan_type`],
`plan_type_field()`
+ /// makes it possible to express extension types (e.g., `arrow.uuid`) or
otherwise
+ /// insert metadata into the DataFusion type representation. The default
implementation
+ /// falls back on [`Self::plan_type`] for backward compatibility and wraps
the result
+ /// in a nullable field reference.
+ fn plan_type_field(
+ &self,
+ sql_type: &sqlparser::ast::DataType,
+ ) -> Result<Option<FieldRef>> {
+ #[expect(deprecated)]
+ Ok(self
+ .plan_type(sql_type)?
+ .map(|data_type| data_type.into_nullable_field_ref()))
+ }
}
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index 7902eed1e6..cd42b68949 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -292,15 +292,13 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
return not_impl_err!("CAST with format is not supported:
{format}");
}
- Ok(Expr::TryCast(TryCast::new(
+ Ok(Expr::TryCast(TryCast::new_from_field(
Box::new(self.sql_expr_to_logical_expr(
*expr,
schema,
planner_context,
)?),
- self.convert_data_type_to_field(&data_type)?
- .data_type()
- .clone(),
+ self.convert_data_type_to_field(&data_type)?,
)))
}
@@ -308,11 +306,9 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
data_type,
value,
uses_odbc_syntax: _,
- }) => Ok(Expr::Cast(Cast::new(
+ }) => Ok(Expr::Cast(Cast::new_from_field(
Box::new(lit(value.into_string().unwrap())),
- self.convert_data_type_to_field(&data_type)?
- .data_type()
- .clone(),
+ self.convert_data_type_to_field(&data_type)?,
))),
SQLExpr::IsNull(expr) => Ok(Expr::IsNull(Box::new(
@@ -1061,12 +1057,7 @@ impl<S: ContextProvider> SqlToRel<'_, S> {
_ => expr,
};
- // Currently drops metadata attached to the type
- // https://github.com/apache/datafusion/issues/18060
- Ok(Expr::Cast(Cast::new(
- Box::new(expr),
- dt.data_type().clone(),
- )))
+ Ok(Expr::Cast(Cast::new_from_field(Box::new(expr), dt)))
}
/// Extracts the root expression and access chain from a compound
expression.
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 307f28e8ff..b7e270e4f0 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -621,9 +621,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
) -> Result<FieldRef> {
// First check if any of the registered type_planner can handle this
type
if let Some(type_planner) = self.context_provider.get_type_planner()
- && let Some(data_type) = type_planner.plan_type(sql_type)?
+ && let Some(data_type) = type_planner.plan_type_field(sql_type)?
{
- return Ok(data_type.into_nullable_field_ref());
+ return Ok(data_type);
}
// If no type_planner can handle this type, use the default conversion
diff --git a/datafusion/sql/tests/common/mod.rs
b/datafusion/sql/tests/common/mod.rs
index 4b8667c3c0..5caade3002 100644
--- a/datafusion/sql/tests/common/mod.rs
+++ b/datafusion/sql/tests/common/mod.rs
@@ -23,6 +23,7 @@ use std::{sync::Arc, vec};
use arrow::datatypes::*;
use datafusion_common::config::ConfigOptions;
+use datafusion_common::datatype::DataTypeExt;
use datafusion_common::file_options::file_type::FileType;
use datafusion_common::{DFSchema, GetExt, Result, TableReference, plan_err};
use datafusion_expr::planner::{ExprPlanner, PlannerResult, TypePlanner};
@@ -341,8 +342,17 @@ impl TableSource for EmptyTable {
pub struct CustomTypePlanner {}
impl TypePlanner for CustomTypePlanner {
- fn plan_type(&self, sql_type: &sqlparser::ast::DataType) ->
Result<Option<DataType>> {
+ fn plan_type_field(
+ &self,
+ sql_type: &sqlparser::ast::DataType,
+ ) -> Result<Option<FieldRef>> {
match sql_type {
+ sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new(
+ Field::new("", DataType::FixedSizeBinary(16),
true).with_metadata(
+ [("ARROW:extension:name".to_string(),
"arrow.uuid".to_string())]
+ .into(),
+ ),
+ ))),
sqlparser::ast::DataType::Datetime(precision) => {
let precision = match precision {
Some(0) => TimeUnit::Second,
@@ -351,7 +361,9 @@ impl TypePlanner for CustomTypePlanner {
None | Some(9) => TimeUnit::Nanosecond,
_ => unreachable!(),
};
- Ok(Some(DataType::Timestamp(precision, None)))
+ Ok(Some(
+ DataType::Timestamp(precision,
None).into_nullable_field_ref(),
+ ))
}
_ => Ok(None),
}
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index 9570336e99..29c17be69c 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -4727,6 +4727,14 @@ fn test_custom_type_plan() -> Result<()> {
"#
);
+ let plan = plan_sql("SELECT UUID '00010203-0405-0607-0809-000102030506'");
+ assert_snapshot!(
+ plan,
+ @r#"
+ Projection: CAST(Utf8("00010203-0405-0607-0809-000102030506") AS
FixedSizeBinary(16)<{"ARROW:extension:name": "arrow.uuid"}>)
+ EmptyRelation: rows=1
+ "#
+ );
Ok(())
}
diff --git a/docs/source/library-user-guide/extending-sql.md
b/docs/source/library-user-guide/extending-sql.md
index 687d884895..eea5b3b1ac 100644
--- a/docs/source/library-user-guide/extending-sql.md
+++ b/docs/source/library-user-guide/extending-sql.md
@@ -158,7 +158,7 @@ when you need to support SQL types that aren't natively
recognized.
```rust
# use std::sync::Arc;
-# use arrow::datatypes::{DataType, TimeUnit};
+# use arrow::datatypes::{DataType, FieldRef, TimeUnit};
# use datafusion::error::Result;
# use datafusion::prelude::*;
# use datafusion::execution::SessionStateBuilder;
@@ -169,7 +169,7 @@ use datafusion_expr::planner::TypePlanner;
struct MyTypePlanner;
impl TypePlanner for MyTypePlanner {
- fn plan_type(&self, sql_type: &ast::DataType) -> Result<Option<DataType>> {
+ fn plan_type_field(&self, sql_type: &ast::DataType) ->
Result<Option<FieldRef>> {
match sql_type {
// Map DATETIME(precision) to Arrow Timestamp
ast::DataType::Datetime(precision) => {
@@ -180,7 +180,9 @@ impl TypePlanner for MyTypePlanner {
None | Some(9) => TimeUnit::Nanosecond,
_ => return Ok(None), // Let default handling take over
};
- Ok(Some(DataType::Timestamp(time_unit, None)))
+ Ok(Some(
+ DataType::Timestamp(time_unit,
None).into_nullable_field_ref()
+ ))
}
_ => Ok(None), // Return None for types we don't handle
}
@@ -202,6 +204,49 @@ async fn main() -> Result<()> {
}
```
+#### Example: Supporting the UUID Type
+
+```rust
+# use std::sync::Arc;
+# use arrow::datatypes::{DataType, FieldRef, TimeUnit};
+# use datafusion::error::Result;
+# use datafusion::prelude::*;
+# use datafusion::execution::SessionStateBuilder;
+use datafusion_expr::planner::TypePlanner;
+# use sqlparser::ast;
+
+#[derive(Debug)]
+struct MyTypePlanner;
+
+impl TypePlanner for MyTypePlanner {
+ fn plan_type_field(&self, sql_type: &ast::DataType) ->
Result<Option<FieldRef>> {
+ match sql_type {
+ sqlparser::ast::DataType::Uuid => Ok(Some(Arc::new(
+ Field::new("", DataType::FixedSizeBinary(16),
true).with_metadata(
+ [("ARROW:extension:name".to_string(),
"arrow.uuid".to_string())]
+ .into(),
+ ),
+ ))),
+ _ => Ok(None),
+ }
+ }
+}
+
+#[tokio::main]
+async fn main() -> Result<()> {
+ let state = SessionStateBuilder::new()
+ .with_default_features()
+ .with_type_planner(Arc::new(MyTypePlanner))
+ .build();
+
+ let ctx = SessionContext::new_with_state(state);
+
+ // Now UUID type is recognized
+ ctx.sql("CREATE TABLE idx (uuid UUID)").await?;
+ Ok(())
+}
+```
+
For more details, see the [TypePlanner API documentation].
### RelationPlanner: Custom FROM Clause Elements
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]