This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch branch-51
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/branch-51 by this push:
     new 187c6b25f5  [branch-51] bugfix: select_columns should validate column 
names (#18624)
187c6b25f5 is described below

commit 187c6b25f56dcd8f6f1ac2edf71d4e4e18a44e2c
Author: Tim Saucer <[email protected]>
AuthorDate: Wed Nov 12 08:35:57 2025 -0500

     [branch-51] bugfix: select_columns should validate column names (#18624)
    
    This is a duplicate of https://github.com/apache/datafusion/pull/18623
    but targeting branch51
    
    See that PR for details.
---
 datafusion/core/src/dataframe/mod.rs   | 19 ++++++++++++++-----
 datafusion/core/tests/dataframe/mod.rs | 23 ++++++++++++++++++++++-
 2 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/datafusion/core/src/dataframe/mod.rs 
b/datafusion/core/src/dataframe/mod.rs
index b43c9c671f..aa378d4262 100644
--- a/datafusion/core/src/dataframe/mod.rs
+++ b/datafusion/core/src/dataframe/mod.rs
@@ -52,8 +52,8 @@ use arrow::datatypes::{DataType, Field, Schema, SchemaRef};
 use datafusion_common::config::{CsvOptions, JsonOptions};
 use datafusion_common::{
     exec_err, internal_datafusion_err, not_impl_err, plan_datafusion_err, 
plan_err,
-    Column, DFSchema, DataFusionError, ParamValues, ScalarValue, SchemaError,
-    TableReference, UnnestOptions,
+    unqualified_field_not_found, Column, DFSchema, DataFusionError, 
ParamValues,
+    ScalarValue, SchemaError, TableReference, UnnestOptions,
 };
 use datafusion_expr::select_expr::SelectExpr;
 use datafusion_expr::{
@@ -310,11 +310,20 @@ impl DataFrame {
     pub fn select_columns(self, columns: &[&str]) -> Result<DataFrame> {
         let fields = columns
             .iter()
-            .flat_map(|name| {
-                self.plan
+            .map(|name| {
+                let fields = self
+                    .plan
                     .schema()
-                    .qualified_fields_with_unqualified_name(name)
+                    .qualified_fields_with_unqualified_name(name);
+                if fields.is_empty() {
+                    Err(unqualified_field_not_found(name, self.plan.schema()))
+                } else {
+                    Ok(fields)
+                }
             })
+            .collect::<Result<Vec<_>, _>>()?
+            .into_iter()
+            .flatten()
             .collect::<Vec<_>>();
         let expr: Vec<Expr> = fields
             .into_iter()
diff --git a/datafusion/core/tests/dataframe/mod.rs 
b/datafusion/core/tests/dataframe/mod.rs
index e221edb7c5..4d52345a2a 100644
--- a/datafusion/core/tests/dataframe/mod.rs
+++ b/datafusion/core/tests/dataframe/mod.rs
@@ -67,7 +67,7 @@ use datafusion_catalog::TableProvider;
 use datafusion_common::test_util::{batches_to_sort_string, batches_to_string};
 use datafusion_common::{
     assert_contains, internal_datafusion_err, Constraint, Constraints, 
DFSchema,
-    DataFusionError, ScalarValue, TableReference, UnnestOptions,
+    DataFusionError, ScalarValue, SchemaError, TableReference, UnnestOptions,
 };
 use datafusion_common_runtime::SpawnedTask;
 use datafusion_datasource::file_format::format_as_file_type;
@@ -305,6 +305,27 @@ async fn select_columns() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn select_columns_with_nonexistent_columns() -> Result<()> {
+    let t = test_table().await?;
+    let t2 = t.select_columns(&["canada", "c2", "rocks"]);
+
+    match t2 {
+        Err(DataFusionError::SchemaError(boxed_err, _)) => {
+            // Verify it's the first invalid column
+            match boxed_err.as_ref() {
+                SchemaError::FieldNotFound { field, .. } => {
+                    assert_eq!(field.name(), "canada");
+                }
+                _ => panic!("Expected SchemaError::FieldNotFound for 
'canada'"),
+            }
+        }
+        _ => panic!("Expected SchemaError"),
+    }
+
+    Ok(())
+}
+
 #[tokio::test]
 async fn select_expr() -> Result<()> {
     // build plan using Table API


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to