This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch branch-51
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/branch-51 by this push:
new 187c6b25f5 [branch-51] bugfix: select_columns should validate column
names (#18624)
187c6b25f5 is described below
commit 187c6b25f56dcd8f6f1ac2edf71d4e4e18a44e2c
Author: Tim Saucer <[email protected]>
AuthorDate: Wed Nov 12 08:35:57 2025 -0500
[branch-51] bugfix: select_columns should validate column names (#18624)
This is a duplicate of https://github.com/apache/datafusion/pull/18623
but targeting branch51
See that PR for details.
---
datafusion/core/src/dataframe/mod.rs | 19 ++++++++++++++-----
datafusion/core/tests/dataframe/mod.rs | 23 ++++++++++++++++++++++-
2 files changed, 36 insertions(+), 6 deletions(-)
diff --git a/datafusion/core/src/dataframe/mod.rs
b/datafusion/core/src/dataframe/mod.rs
index b43c9c671f..aa378d4262 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -52,8 +52,8 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
use datafusion_common::config::{CsvOptions, JsonOptions};
use datafusion_common::{
exec_err, internal_datafusion_err, not_impl_err, plan_datafusion_err,
plan_err,
- Column, DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaError,
- TableReference, UnnestOptions,
+ unqualified_field_not_found, Column, DFSchema, DataFusionError,
ParamValues,
+ ScalarValue, SchemaError, TableReference, UnnestOptions,
};
use datafusion_expr::select_expr::SelectExpr;
use datafusion_expr::{
@@ -310,11 +310,20 @@ impl DataFrame {
pub fn select_columns(self, columns: &[&str]) -> Result<DataFrame> {
let fields = columns
.iter()
- .flat_map(|name| {
- self.plan
+ .map(|name| {
+ let fields = self
+ .plan
.schema()
- .qualified_fields_with_unqualified_name(name)
+ .qualified_fields_with_unqualified_name(name);
+ if fields.is_empty() {
+ Err(unqualified_field_not_found(name, self.plan.schema()))
+ } else {
+ Ok(fields)
+ }
})
+ .collect::<Result<Vec<_>, _>>()?
+ .into_iter()
+ .flatten()
.collect::<Vec<_>>();
let expr: Vec<Expr> = fields
.into_iter()
diff --git a/datafusion/core/tests/dataframe/mod.rs
b/datafusion/core/tests/dataframe/mod.rs
index e221edb7c5..4d52345a2a 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -67,7 +67,7 @@ use datafusion_catalog::TableProvider;
use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
use datafusion_common::{
assert_contains, internal_datafusion_err, Constraint, Constraints,
DFSchema,
- DataFusionError, ScalarValue, TableReference, UnnestOptions,
+ DataFusionError, ScalarValue, SchemaError, TableReference, UnnestOptions,
};
use datafusion_common_runtime::SpawnedTask;
use datafusion_datasource::file_format::format_as_file_type;
@@ -305,6 +305,27 @@ async fn select_columns() -> Result<()> {
Ok(())
}
+#[tokio::test]
+async fn select_columns_with_nonexistent_columns() -> Result<()> {
+ let t = test_table().await?;
+ let t2 = t.select_columns(&["canada", "c2", "rocks"]);
+
+ match t2 {
+ Err(DataFusionError::SchemaError(boxed_err, _)) => {
+ // Verify it's the first invalid column
+ match boxed_err.as_ref() {
+ SchemaError::FieldNotFound { field, .. } => {
+ assert_eq!(field.name(), "canada");
+ }
+ _ => panic!("Expected SchemaError::FieldNotFound for
'canada'"),
+ }
+ }
+ _ => panic!("Expected SchemaError"),
+ }
+
+ Ok(())
+}
+
#[tokio::test]
async fn select_expr() -> Result<()> {
// build plan using Table API
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]