This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new ece59a278a create table default to null (#5606)
ece59a278a is described below
commit ece59a278a72703c237174899c46761309903418
Author: Alex Huang <[email protected]>
AuthorDate: Wed Mar 22 22:29:56 2023 +0100
create table default to null (#5606)
* create table default to null
* allow build_schema to make null schema
* remove create_table
* make variable readable
* refactor run_query because of new create table statement
* remove useless statement
---
.../src/engines/datafusion/create_table.rs | 80 ------------------
.../sqllogictests/src/engines/datafusion/mod.rs | 33 +-------
.../core/tests/sqllogictests/test_files/ddl.slt | 16 +++-
.../tests/sqllogictests/test_files/describe.slt | 12 +--
datafusion/sql/src/planner.rs | 6 +-
datafusion/sql/src/statement.rs | 96 +++++++++++++---------
6 files changed, 85 insertions(+), 158 deletions(-)
diff --git
a/datafusion/core/tests/sqllogictests/src/engines/datafusion/create_table.rs
b/datafusion/core/tests/sqllogictests/src/engines/datafusion/create_table.rs
deleted file mode 100644
index 2d2e37f4b7..0000000000
--- a/datafusion/core/tests/sqllogictests/src/engines/datafusion/create_table.rs
+++ /dev/null
@@ -1,80 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements. See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership. The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License. You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied. See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use super::error::Result;
-use crate::engines::datafusion::error::DFSqlLogicTestError;
-use crate::engines::datafusion::util::LogicTestContextProvider;
-use crate::engines::output::DFOutput;
-use datafusion::datasource::MemTable;
-use datafusion::prelude::SessionContext;
-use datafusion_common::{DataFusionError, OwnedTableReference};
-use datafusion_sql::planner::{object_name_to_table_reference, ParserOptions,
SqlToRel};
-use sqllogictest::DBOutput;
-use sqlparser::ast::{ColumnDef, ObjectName};
-use std::sync::Arc;
-
-pub async fn create_table(
- ctx: &SessionContext,
- name: ObjectName,
- columns: Vec<ColumnDef>,
- if_not_exists: bool,
- or_replace: bool,
-) -> Result<DFOutput> {
- let table_reference =
- object_name_to_table_reference(name,
ctx.enable_ident_normalization())?;
- let existing_table = ctx.table(&table_reference).await;
- match (if_not_exists, or_replace, existing_table) {
- (true, false, Ok(_)) => Ok(DBOutput::StatementComplete(0)),
- (false, true, Ok(_)) => {
- ctx.deregister_table(&table_reference)?;
- create_new_table(ctx, table_reference, columns)
- }
- (true, true, Ok(_)) => {
- Err(DFSqlLogicTestError::from(DataFusionError::Execution(
- "'IF NOT EXISTS' cannot coexist with 'REPLACE'".to_string(),
- )))
- }
- (_, _, Err(_)) => create_new_table(ctx, table_reference, columns),
- (false, false, Ok(_)) => {
- Err(DFSqlLogicTestError::from(DataFusionError::Execution(
- format!("Table '{table_reference}' already exists"),
- )))
- }
- }
-}
-
-fn create_new_table(
- ctx: &SessionContext,
- table_reference: OwnedTableReference,
- columns: Vec<ColumnDef>,
-) -> Result<DFOutput> {
- let config = ctx.copied_config();
- let sql_to_rel = SqlToRel::new_with_options(
- &LogicTestContextProvider {},
- ParserOptions {
- parse_float_as_decimal:
config.options().sql_parser.parse_float_as_decimal,
- enable_ident_normalization: config
- .options()
- .sql_parser
- .enable_ident_normalization,
- },
- );
- let schema = Arc::new(sql_to_rel.build_schema(columns)?);
- let table_provider = Arc::new(MemTable::try_new(schema, vec![])?);
- ctx.register_table(&table_reference, table_provider)?;
- Ok(DBOutput::StatementComplete(0))
-}
diff --git a/datafusion/core/tests/sqllogictests/src/engines/datafusion/mod.rs
b/datafusion/core/tests/sqllogictests/src/engines/datafusion/mod.rs
index cdd6663a5e..c87f0048ce 100644
--- a/datafusion/core/tests/sqllogictests/src/engines/datafusion/mod.rs
+++ b/datafusion/core/tests/sqllogictests/src/engines/datafusion/mod.rs
@@ -22,14 +22,11 @@ use crate::engines::output::{DFColumnType, DFOutput};
use self::error::{DFSqlLogicTestError, Result};
use async_trait::async_trait;
-use create_table::create_table;
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::prelude::SessionContext;
-use datafusion_sql::parser::{DFParser, Statement};
+use datafusion_sql::parser::DFParser;
use sqllogictest::DBOutput;
-use sqlparser::ast::Statement as SQLStatement;
-mod create_table;
mod error;
mod normalize;
mod util;
@@ -77,33 +74,9 @@ impl sqllogictest::AsyncDB for DataFusion {
async fn run_query(ctx: &SessionContext, sql: impl Into<String>) ->
Result<DFOutput> {
let sql = sql.into();
- // Check if the sql is `insert`
+ // check if the sql is more than one statement
if let Ok(mut statements) = DFParser::parse_sql(&sql) {
- let statement0 = statements.pop_front().expect("at least one SQL
statement");
- if let Statement::Statement(statement) = statement0 {
- let statement = *statement;
- match statement {
- SQLStatement::CreateTable {
- query,
- constraints,
- table_properties,
- with_options,
- name,
- columns,
- if_not_exists,
- or_replace,
- ..
- } if query.is_none()
- && constraints.is_empty()
- && table_properties.is_empty()
- && with_options.is_empty() =>
- {
- return create_table(ctx, name, columns, if_not_exists,
or_replace)
- .await
- }
- _ => {}
- };
- }
+ statements.pop_front().expect("at least one SQL statement");
}
let df = ctx.sql(sql.as_str()).await?;
diff --git a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
index a4c90df072..c012ae021f 100644
--- a/datafusion/core/tests/sqllogictests/test_files/ddl.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/ddl.slt
@@ -501,8 +501,8 @@ datafusion public table_with_normalization NULL
query TTT
describe table_with_normalization
----
-field1 Int64 NO
-field2 Int64 NO
+field1 Int64 YES
+field2 Int64 YES
# Disable ident normalization
statement ok
@@ -553,3 +553,15 @@ set datafusion.sql_parser.parse_float_as_decimal = false;
statement ok
set datafusion.sql_parser.enable_ident_normalization = true;
+
+
+statement ok
+create table foo(x int);
+
+statement ok
+insert into foo values (null);
+
+query I
+select * from foo;
+----
+NULL
\ No newline at end of file
diff --git a/datafusion/core/tests/sqllogictests/test_files/describe.slt
b/datafusion/core/tests/sqllogictests/test_files/describe.slt
index 9862f65806..5ee4d1cd21 100644
--- a/datafusion/core/tests/sqllogictests/test_files/describe.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/describe.slt
@@ -29,9 +29,9 @@ CREATE external table aggregate_simple(c1 real, c2 double, c3
boolean) STORED as
query TTT rowsort
DESCRIBE aggregate_simple;
----
-c1 Float32 NO
-c2 Float64 NO
-c3 Boolean NO
+c1 Float32 YES
+c2 Float64 YES
+c3 Boolean YES
statement ok
DROP TABLE aggregate_simple;
@@ -49,9 +49,9 @@ CREATE external table aggregate_simple(c1 real, c2 double, c3
boolean) STORED as
query TTT rowsort
DESCRIBE aggregate_simple;
----
-c1 Float32 NO
-c2 Float64 NO
-c3 Boolean NO
+c1 Float32 YES
+c2 Float64 YES
+c3 Boolean YES
statement ok
DROP TABLE aggregate_simple;
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 87c090e3ee..9156638f44 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -137,14 +137,14 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
for column in columns {
let data_type = self.convert_simple_data_type(&column.data_type)?;
- let allow_null = column
+ let not_nullable = column
.options
.iter()
- .any(|x| x.option == ColumnOption::Null);
+ .any(|x| x.option == ColumnOption::NotNull);
fields.push(Field::new(
normalize_ident(column.name,
self.options.enable_ident_normalization),
data_type,
- allow_null,
+ !not_nullable,
));
}
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index 5f927c701e..a770ee725f 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -35,8 +35,9 @@ use datafusion_expr::utils::expr_to_columns;
use datafusion_expr::{
cast, col, CreateCatalog, CreateCatalogSchema,
CreateExternalTable as PlanCreateExternalTable, CreateMemoryTable,
CreateView,
- DescribeTable, DmlStatement, DropTable, DropView, Explain, ExprSchemable,
Filter,
- LogicalPlan, LogicalPlanBuilder, PlanType, SetVariable, ToStringifiedPlan,
WriteOp,
+ DescribeTable, DmlStatement, DropTable, DropView, EmptyRelation, Explain,
+ ExprSchemable, Filter, LogicalPlan, LogicalPlanBuilder, PlanType,
SetVariable,
+ ToStringifiedPlan, WriteOp,
};
use sqlparser::ast;
use sqlparser::ast::{
@@ -44,6 +45,7 @@ use sqlparser::ast::{
SchemaName, SetExpr, ShowCreateObject, ShowStatementFilter, Statement,
TableFactor,
TableWithJoins, UnaryOperator, Value,
};
+
use sqlparser::parser::ParserError::ParserError;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::sync::Arc;
@@ -114,7 +116,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
} => self.set_variable_to_plan(local, hivevar, &variable, value),
Statement::CreateTable {
- query: Some(query),
+ query,
name,
columns,
constraints,
@@ -127,42 +129,66 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
&& table_properties.is_empty()
&& with_options.is_empty() =>
{
- let plan = self.query_to_plan(*query, planner_context)?;
- let input_schema = plan.schema();
-
- let plan = if !columns.is_empty() {
- let schema =
self.build_schema(columns)?.to_dfschema_ref()?;
- if schema.fields().len() != input_schema.fields().len() {
- return Err(DataFusionError::Plan(format!(
+ match query {
+ Some(query) => {
+ let plan = self.query_to_plan(*query,
planner_context)?;
+ let input_schema = plan.schema();
+
+ let plan = if !columns.is_empty() {
+ let schema =
self.build_schema(columns)?.to_dfschema_ref()?;
+ if schema.fields().len() !=
input_schema.fields().len() {
+ return Err(DataFusionError::Plan(format!(
"Mismatch: {} columns specified, but result has {}
columns",
schema.fields().len(),
input_schema.fields().len()
)));
+ }
+ let input_fields = input_schema.fields();
+ let project_exprs = schema
+ .fields()
+ .iter()
+ .zip(input_fields)
+ .map(|(field, input_field)| {
+ cast(
+ col(input_field.name()),
+ field.data_type().clone(),
+ )
+ .alias(field.name())
+ })
+ .collect::<Vec<_>>();
+ LogicalPlanBuilder::from(plan.clone())
+ .project(project_exprs)?
+ .build()?
+ } else {
+ plan
+ };
+
+ Ok(LogicalPlan::CreateMemoryTable(CreateMemoryTable {
+ name: self.object_name_to_table_reference(name)?,
+ input: Arc::new(plan),
+ if_not_exists,
+ or_replace,
+ }))
}
- let input_fields = input_schema.fields();
- let project_exprs = schema
- .fields()
- .iter()
- .zip(input_fields)
- .map(|(field, input_field)| {
- cast(col(input_field.name()),
field.data_type().clone())
- .alias(field.name())
- })
- .collect::<Vec<_>>();
- LogicalPlanBuilder::from(plan.clone())
- .project(project_exprs)?
- .build()?
- } else {
- plan
- };
-
- Ok(LogicalPlan::CreateMemoryTable(CreateMemoryTable {
- name: self.object_name_to_table_reference(name)?,
- input: Arc::new(plan),
- if_not_exists,
- or_replace,
- }))
+
+ None => {
+ let schema =
self.build_schema(columns)?.to_dfschema_ref()?;
+ let plan = EmptyRelation {
+ produce_one_row: false,
+ schema,
+ };
+ let plan = LogicalPlan::EmptyRelation(plan);
+
+ Ok(LogicalPlan::CreateMemoryTable(CreateMemoryTable {
+ name: self.object_name_to_table_reference(name)?,
+ input: Arc::new(plan),
+ if_not_exists,
+ or_replace,
+ }))
+ }
+ }
}
+
Statement::CreateView {
or_replace,
name,
@@ -181,10 +207,6 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
definition: sql,
}))
}
- Statement::CreateTable { .. } =>
Err(DataFusionError::NotImplemented(
- "Only `CREATE TABLE table_name AS SELECT ...` statement is
supported"
- .to_string(),
- )),
Statement::ShowCreate { obj_type, obj_name } => match obj_type {
ShowCreateObject::Table =>
self.show_create_table_to_plan(obj_name),
_ => Err(DataFusionError::NotImplemented(