This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 0589dbb28c fix: describe escaped quoted identifiers (#16082)
0589dbb28c is described below
commit 0589dbb28c6a26a589cd13bb2d1fe569f914642d
Author: Joseph Fahnestock <[email protected]>
AuthorDate: Wed May 21 11:39:30 2025 -0400
fix: describe escaped quoted identifiers (#16082)
* feat: escape quote wrap identifiers in describe
rm: dev files
fmt: final formatting
sed: s/<comment>//
* fix: use ident instead of col + format
---
datafusion/core/src/dataframe/mod.rs | 18 ++++++------
datafusion/core/tests/dataframe/mod.rs | 50 ++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+), 9 deletions(-)
diff --git a/datafusion/core/src/dataframe/mod.rs
b/datafusion/core/src/dataframe/mod.rs
index e385125692..a67c871e4c 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -33,8 +33,8 @@ use crate::execution::context::{SessionState, TaskContext};
use crate::execution::FunctionRegistry;
use crate::logical_expr::utils::find_window_exprs;
use crate::logical_expr::{
- col, Expr, JoinType, LogicalPlan, LogicalPlanBuilder,
LogicalPlanBuilderOptions,
- Partitioning, TableType,
+ col, ident, Expr, JoinType, LogicalPlan, LogicalPlanBuilder,
+ LogicalPlanBuilderOptions, Partitioning, TableType,
};
use crate::physical_plan::{
collect, collect_partitioned, execute_stream, execute_stream_partitioned,
@@ -934,7 +934,7 @@ impl DataFrame {
vec![],
original_schema_fields
.clone()
- .map(|f| count(col(f.name())).alias(f.name()))
+ .map(|f| count(ident(f.name())).alias(f.name()))
.collect::<Vec<_>>(),
),
// null_count aggregation
@@ -943,7 +943,7 @@ impl DataFrame {
original_schema_fields
.clone()
.map(|f| {
- sum(case(is_null(col(f.name())))
+ sum(case(is_null(ident(f.name())))
.when(lit(true), lit(1))
.otherwise(lit(0))
.unwrap())
@@ -957,7 +957,7 @@ impl DataFrame {
original_schema_fields
.clone()
.filter(|f| f.data_type().is_numeric())
- .map(|f| avg(col(f.name())).alias(f.name()))
+ .map(|f| avg(ident(f.name())).alias(f.name()))
.collect::<Vec<_>>(),
),
// std aggregation
@@ -966,7 +966,7 @@ impl DataFrame {
original_schema_fields
.clone()
.filter(|f| f.data_type().is_numeric())
- .map(|f| stddev(col(f.name())).alias(f.name()))
+ .map(|f| stddev(ident(f.name())).alias(f.name()))
.collect::<Vec<_>>(),
),
// min aggregation
@@ -977,7 +977,7 @@ impl DataFrame {
.filter(|f| {
!matches!(f.data_type(), DataType::Binary |
DataType::Boolean)
})
- .map(|f| min(col(f.name())).alias(f.name()))
+ .map(|f| min(ident(f.name())).alias(f.name()))
.collect::<Vec<_>>(),
),
// max aggregation
@@ -988,7 +988,7 @@ impl DataFrame {
.filter(|f| {
!matches!(f.data_type(), DataType::Binary |
DataType::Boolean)
})
- .map(|f| max(col(f.name())).alias(f.name()))
+ .map(|f| max(ident(f.name())).alias(f.name()))
.collect::<Vec<_>>(),
),
// median aggregation
@@ -997,7 +997,7 @@ impl DataFrame {
original_schema_fields
.clone()
.filter(|f| f.data_type().is_numeric())
- .map(|f| median(col(f.name())).alias(f.name()))
+ .map(|f| median(ident(f.name())).alias(f.name()))
.collect::<Vec<_>>(),
),
];
diff --git a/datafusion/core/tests/dataframe/mod.rs
b/datafusion/core/tests/dataframe/mod.rs
index 827808d923..d0692d6231 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -1852,6 +1852,56 @@ async fn with_column_renamed_case_sensitive() ->
Result<()> {
Ok(())
}
+#[tokio::test]
+async fn describe_lookup_via_quoted_identifier() -> Result<()> {
+ let ctx = SessionContext::new();
+ let name = "aggregate_test_100";
+ register_aggregate_csv(&ctx, name).await?;
+ let df = ctx.table(name);
+
+ let df = df
+ .await?
+ .filter(col("c2").eq(lit(3)).and(col("c1").eq(lit("a"))))?
+ .limit(0, Some(1))?
+ .sort(vec![
+ // make the test deterministic
+ col("c1").sort(true, true),
+ col("c2").sort(true, true),
+ col("c3").sort(true, true),
+ ])?
+ .select_columns(&["c1"])?;
+
+ let df_renamed = df.clone().with_column_renamed("c1", "CoLu.Mn[\"1\"]")?;
+
+ let describe_result = df_renamed.describe().await?;
+ describe_result
+ .clone()
+ .sort(vec![
+ col("describe").sort(true, true),
+ col("CoLu.Mn[\"1\"]").sort(true, true),
+ ])?
+ .show()
+ .await?;
+ assert_snapshot!(
+ batches_to_sort_string(&describe_result.clone().collect().await?),
+ @r###"
+ +------------+--------------+
+ | describe | CoLu.Mn["1"] |
+ +------------+--------------+
+ | count | 1 |
+ | max | a |
+ | mean | null |
+ | median | null |
+ | min | a |
+ | null_count | 0 |
+ | std | null |
+ +------------+--------------+
+ "###
+ );
+
+ Ok(())
+}
+
#[tokio::test]
async fn cast_expr_test() -> Result<()> {
let df = test_table()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]