This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 35adf47fdb Add support for external tables with qualified names 
(#12645)
35adf47fdb is described below

commit 35adf47fdbd626d79051799921146b96e3345e3b
Author: OussamaSaoudi <[email protected]>
AuthorDate: Tue Oct 1 15:29:47 2024 -0700

    Add support for external tables with qualified names (#12645)
    
    * Make  support schemas
    
    * Set default name to table
    
    * Remove print statements and stale comment
    
    * Add tests for create table
    
    * Fix typo
    
    * Update datafusion/sql/src/statement.rs
    
    Co-authored-by: Jonah Gao <[email protected]>
    
    * convert create_external_table to objectname
    
    * Add sqllogic tests
    
    * Fix failing tests
    
    ---------
    
    Co-authored-by: Jonah Gao <[email protected]>
---
 datafusion/core/src/catalog_common/mod.rs          |  4 +--
 datafusion/sql/src/parser.rs                       | 39 +++++++++++-----------
 datafusion/sql/src/statement.rs                    |  3 +-
 datafusion/sql/tests/sql_integration.rs            |  7 ++++
 .../test_files/create_external_table.slt           | 12 +++++++
 5 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/datafusion/core/src/catalog_common/mod.rs 
b/datafusion/core/src/catalog_common/mod.rs
index b841437886..85207845a0 100644
--- a/datafusion/core/src/catalog_common/mod.rs
+++ b/datafusion/core/src/catalog_common/mod.rs
@@ -185,9 +185,7 @@ pub fn resolve_table_references(
                 let _ = s.as_ref().visit(visitor);
             }
             DFStatement::CreateExternalTable(table) => {
-                visitor
-                    .relations
-                    
.insert(ObjectName(vec![Ident::from(table.name.as_str())]));
+                visitor.relations.insert(table.name.clone());
             }
             DFStatement::CopyTo(CopyToStatement { source, .. }) => match 
source {
                 CopyToSource::Relation(table_name) => {
diff --git a/datafusion/sql/src/parser.rs b/datafusion/sql/src/parser.rs
index 2df8d89c59..6d130647a4 100644
--- a/datafusion/sql/src/parser.rs
+++ b/datafusion/sql/src/parser.rs
@@ -181,7 +181,7 @@ pub(crate) type LexOrdering = Vec<OrderByExpr>;
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct CreateExternalTable {
     /// Table name
-    pub name: String,
+    pub name: ObjectName,
     /// Optional schema
     pub columns: Vec<ColumnDef>,
     /// File type (Parquet, NDJSON, CSV, etc)
@@ -813,7 +813,7 @@ impl<'a> DFParser<'a> {
         }
 
         let create = CreateExternalTable {
-            name: table_name.to_string(),
+            name: table_name,
             columns,
             file_type: builder.file_type.unwrap(),
             location: builder.location.unwrap(),
@@ -915,8 +915,9 @@ mod tests {
         // positive case
         let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 
'foo.csv'";
         let display = None;
+        let name = ObjectName(vec![Ident::from("t")]);
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -932,7 +933,7 @@ mod tests {
         // positive case: leading space
         let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 
'foo.csv'     ";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(None))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -949,7 +950,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'  
    ;";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(None))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -966,7 +967,7 @@ mod tests {
         let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 
'foo.csv' OPTIONS (format.delimiter '|')";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -986,7 +987,7 @@ mod tests {
         let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED 
BY (p1, p2) LOCATION 'foo.csv'";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![make_column_def("c1", DataType::Int(display))],
             file_type: "CSV".to_string(),
             location: "foo.csv".into(),
@@ -1013,7 +1014,7 @@ mod tests {
          ];
         for (sql, compression) in sqls {
             let expected = Statement::CreateExternalTable(CreateExternalTable {
-                name: "t".into(),
+                name: name.clone(),
                 columns: vec![make_column_def("c1", DataType::Int(display))],
                 file_type: "CSV".to_string(),
                 location: "foo.csv".into(),
@@ -1033,7 +1034,7 @@ mod tests {
         // positive case: it is ok for parquet files not to have columns 
specified
         let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 
'foo.parquet'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "PARQUET".to_string(),
             location: "foo.parquet".into(),
@@ -1049,7 +1050,7 @@ mod tests {
         // positive case: it is ok for parquet files to be other than upper 
case
         let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 
'foo.parquet'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "PARQUET".to_string(),
             location: "foo.parquet".into(),
@@ -1065,7 +1066,7 @@ mod tests {
         // positive case: it is ok for avro files not to have columns specified
         let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "AVRO".to_string(),
             location: "foo.avro".into(),
@@ -1082,7 +1083,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 
'foo.parquet'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "PARQUET".to_string(),
             location: "foo.parquet".into(),
@@ -1099,7 +1100,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 
int) LOCATION 'foo.csv'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![
                 make_column_def("c1", DataType::Int(None)),
                 make_column_def("p1", DataType::Int(None)),
@@ -1132,7 +1133,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 
'blahblah'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "X".to_string(),
             location: "blahblah".into(),
@@ -1149,7 +1150,7 @@ mod tests {
         let sql =
             "CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) 
LOCATION 'blahblah'";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![],
             file_type: "X".to_string(),
             location: "blahblah".into(),
@@ -1188,7 +1189,7 @@ mod tests {
         ];
         for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) 
{
             let expected = Statement::CreateExternalTable(CreateExternalTable {
-                name: "t".into(),
+                name: name.clone(),
                 columns: vec![make_column_def("c1", DataType::Int(None))],
                 file_type: "CSV".to_string(),
                 location: "foo.csv".into(),
@@ -1214,7 +1215,7 @@ mod tests {
         let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH 
ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![
                 make_column_def("c1", DataType::Int(display)),
                 make_column_def("c2", DataType::Int(display)),
@@ -1253,7 +1254,7 @@ mod tests {
         let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH 
ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
         let display = None;
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![
                 make_column_def("c1", DataType::Int(display)),
                 make_column_def("c2", DataType::Int(display)),
@@ -1297,7 +1298,7 @@ mod tests {
                      'TRUNCATE' 'NO',
                      'format.has_header' 'true')";
         let expected = Statement::CreateExternalTable(CreateExternalTable {
-            name: "t".into(),
+            name: name.clone(),
             columns: vec![
                 make_column_def("c1", DataType::Int(None)),
                 make_column_def("c2", DataType::Float(None)),
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 895285c597..656d72d07b 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -1239,8 +1239,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         let ordered_exprs =
             self.build_order_by(order_exprs, &df_schema, &mut 
planner_context)?;
 
-        // External tables do not support schemas at the moment, so the name 
is just a table name
-        let name = TableReference::bare(name);
+        let name = self.object_name_to_table_reference(name)?;
         let constraints =
             Constraints::new_from_table_constraints(&all_constraints, 
&df_schema)?;
         Ok(LogicalPlan::Ddl(DdlStatement::CreateExternalTable(
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index 5c9655a556..44b591fede 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -1913,6 +1913,13 @@ fn create_external_table_with_pk() {
     quick_test(sql, expected);
 }
 
+#[test]
+fn create_external_table_wih_schema() {
+    let sql = "CREATE EXTERNAL TABLE staging.foo STORED AS CSV LOCATION 
'foo.csv'";
+    let expected = "CreateExternalTable: Partial { schema: \"staging\", table: 
\"foo\" }";
+    quick_test(sql, expected);
+}
+
 #[test]
 fn create_schema_with_quoted_name() {
     let sql = "CREATE SCHEMA \"quoted_schema_name\"";
diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt 
b/datafusion/sqllogictest/test_files/create_external_table.slt
index 12b097c3d5..9ac2ecdce7 100644
--- a/datafusion/sqllogictest/test_files/create_external_table.slt
+++ b/datafusion/sqllogictest/test_files/create_external_table.slt
@@ -275,3 +275,15 @@ DROP TABLE t;
 # query should fail with bad column
 statement error DataFusion error: Error during planning: Column foo is not in 
schema
 CREATE EXTERNAL TABLE t STORED AS parquet LOCATION 
'../../parquet-testing/data/alltypes_plain.parquet' WITH ORDER (foo);
+
+# Create external table with qualified name should belong to the schema
+statement ok
+CREATE SCHEMA staging;
+
+statement ok
+CREATE EXTERNAL TABLE staging.foo STORED AS parquet LOCATION 
'../../parquet-testing/data/alltypes_plain.parquet';
+
+# Create external table with qualified name, but no schema should error
+statement error DataFusion error: Error during planning: failed to resolve 
schema: release
+CREATE EXTERNAL TABLE release.bar STORED AS parquet LOCATION 
'../../parquet-testing/data/alltypes_plain.parquet';
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to