iffyio commented on code in PR #1939: URL: https://github.com/apache/datafusion-sqlparser-rs/pull/1939#discussion_r2206923615
########## src/dialect/snowflake.rs: ########## @@ -731,6 +734,115 @@ pub fn parse_create_table( Ok(builder.build()) } +/// Parse snowflake create database statement. +/// <https://docs.snowflake.com/en/sql-reference/sql/create-database> +pub fn parse_create_database( + or_replace: bool, + transient: bool, + parser: &mut Parser, +) -> Result<Statement, ParserError> { + let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = parser.parse_object_name(false)?; + + let mut builder = CreateDatabaseBuilder::new(name) + .or_replace(or_replace) + .transient(transient) + .if_not_exists(if_not_exists); + + loop { + let next_token = parser.next_token(); + match &next_token.token { + Token::Word(word) => match word.keyword { + Keyword::CLONE => { + builder = builder.clone_clause(Some(parser.parse_object_name(false)?)); + } + Keyword::DATA_RETENTION_TIME_IN_DAYS => { + parser.expect_token(&Token::Eq)?; + builder = + builder.data_retention_time_in_days(Some(parser.parse_literal_uint()?)); + } + Keyword::MAX_DATA_EXTENSION_TIME_IN_DAYS => { + parser.expect_token(&Token::Eq)?; + builder = + builder.max_data_extension_time_in_days(Some(parser.parse_literal_uint()?)); + } + Keyword::EXTERNAL_VOLUME => { + parser.expect_token(&Token::Eq)?; + builder = builder.external_volume(Some(parser.parse_literal_string()?)); + } + Keyword::CATALOG => { + parser.expect_token(&Token::Eq)?; + builder = builder.catalog(Some(parser.parse_literal_string()?)); + } + Keyword::REPLACE_INVALID_CHARACTERS => { + parser.expect_token(&Token::Eq)?; + builder = + builder.replace_invalid_characters(Some(parser.parse_boolean_string()?)); + } + Keyword::DEFAULT_DDL_COLLATION => { + parser.expect_token(&Token::Eq)?; + builder = builder.default_ddl_collation(Some(parser.parse_literal_string()?)); + } + Keyword::STORAGE_SERIALIZATION_POLICY => { + parser.expect_token(&Token::Eq)?; + let policy = parse_storage_serialization_policy(parser)?; + builder = builder.storage_serialization_policy(Some(policy)); + } + Keyword::COMMENT => { + parser.expect_token(&Token::Eq)?; + builder = builder.comment(Some(parser.parse_literal_string()?)); + } + Keyword::CATALOG_SYNC => { + parser.expect_token(&Token::Eq)?; + builder = builder.catalog_sync(Some(parser.parse_literal_string()?)); + } + Keyword::CATALOG_SYNC_NAMESPACE_FLATTEN_DELIMITER => { + parser.expect_token(&Token::Eq)?; + builder = builder.catalog_sync_namespace_flatten_delimiter(Some( + parser.parse_literal_string()?, + )); + } + Keyword::CATALOG_SYNC_NAMESPACE_MODE => { + parser.expect_token(&Token::Eq)?; + let mode = + match parser.parse_one_of_keywords(&[Keyword::NEST, Keyword::FLATTEN]) { + Some(Keyword::NEST) => CatalogSyncNamespaceMode::Nest, + Some(Keyword::FLATTEN) => CatalogSyncNamespaceMode::Flatten, + _ => { + return parser.expected("NEST or FLATTEN", next_token); + } + }; + builder = builder.catalog_sync_namespace_mode(Some(mode)); + } + Keyword::WITH => { + if parser.parse_keyword(Keyword::TAG) { + parser.expect_token(&Token::LParen)?; + let tags = parser.parse_comma_separated(Parser::parse_tag)?; + parser.expect_token(&Token::RParen)?; + builder = builder.with_tags(Some(tags)); + } else if parser.parse_keyword(Keyword::CONTACT) { + parser.expect_token(&Token::LParen)?; + let contacts = parser.parse_comma_separated(|p| { + let purpose = p.parse_identifier()?.value; + p.expect_token(&Token::Eq)?; + let contact = p.parse_identifier()?.value; + Ok(ContactEntry::new(purpose, contact)) + })?; + parser.expect_token(&Token::RParen)?; + builder = builder.with_contacts(Some(contacts)); + } else { + return parser.expected("TAG or CONTACT", next_token); + } + } + _ => return parser.expected("end of statementrrr", next_token), Review Comment: ```suggestion _ => return parser.expected("end of statement", next_token), ``` ########## src/ast/ddl.rs: ########## @@ -2524,3 +2524,104 @@ impl fmt::Display for CreateConnector { Ok(()) } } + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreateSnowflakeDatabase { Review Comment: Can we reuse [this existing representation](https://github.com/apache/datafusion-sqlparser-rs/blob/7654a49b50aa0d05475901fc145bdf1f8c14dfff/src/ast/mod.rs#L3853-L3857) of the `CREATE DATABASE` statement? essentially adding the snowflake parameters to it. We try to avoid dialect specific representations in the AST in general. Since we're looking to add quite a few options, it might be that we would ideally need to first convert the current `Statement::CreateDatabase{...}` away from the [anonymous struct syntax](https://github.com/apache/datafusion-sqlparser-rs/issues/1204) towards an explicit struct e.g. ```rust Statement::CreateDatabase(CreateDatabase{...}) ``` ########## src/ast/mod.rs: ########## @@ -3857,6 +3857,31 @@ pub enum Statement { managed_location: Option<String>, }, /// ```sql + /// CREATE [ OR REPLACE ] [ TRANSIENT ] DATABASE [ IF NOT EXISTS ] <name> + /// [ CLONE <source_schema> + /// [ { AT | BEFORE } ( { TIMESTAMP => <timestamp> | OFFSET => <time_difference> | STATEMENT => <id> } ) ] + /// [ IGNORE TABLES WITH INSUFFICIENT DATA RETENTION ] + /// [ IGNORE HYBRID TABLES ] ] + /// [ DATA_RETENTION_TIME_IN_DAYS = <integer> ] + /// [ MAX_DATA_EXTENSION_TIME_IN_DAYS = <integer> ] + /// [ EXTERNAL_VOLUME = <external_volume_name> ] + /// [ CATALOG = <catalog_integration_name> ] + /// [ REPLACE_INVALID_CHARACTERS = { TRUE | FALSE } ] + /// [ DEFAULT_DDL_COLLATION = '<collation_specification>' ] + /// [ STORAGE_SERIALIZATION_POLICY = { COMPATIBLE | OPTIMIZED } ] + /// [ COMMENT = '<string_literal>' ] + /// [ CATALOG_SYNC = '<snowflake_open_catalog_integration_name>' ] + /// [ CATALOG_SYNC_NAMESPACE_MODE = { NEST | FLATTEN } ] + /// [ CATALOG_SYNC_NAMESPACE_FLATTEN_DELIMITER = '<string_literal>' ] + /// [ [ WITH ] TAG ( <tag_name> = '<tag_value>' [ , <tag_name> = '<tag_value>' , ... ] ) ] + /// [ WITH CONTACT ( <purpose> = <contact_name> [ , <purpose> = <contact_name> ... ] ) ] Review Comment: Re documentation, i think using a simple example or only mentioning that it represents the `CREATE DATABASE` statement would suffice, the actual spec we can delegate to the linked documentation for each dialect. ########## tests/sqlparser_snowflake.rs: ########## @@ -4438,3 +4438,63 @@ fn test_snowflake_identifier_function() { true ); } + +#[test] Review Comment: Can we merge all the added tests into the same `fn test_create_database()` function? since they are part of the same feature ########## tests/sqlparser_snowflake.rs: ########## @@ -4438,3 +4438,63 @@ fn test_snowflake_identifier_function() { true ); } + +#[test] +fn test_create_database_basic() { + snowflake().verified_stmt("CREATE DATABASE my_db"); + snowflake().verified_stmt("CREATE OR REPLACE DATABASE my_db"); + snowflake().verified_stmt("CREATE TRANSIENT DATABASE IF NOT EXISTS my_db"); +} + +#[test] +fn test_create_database_clone() { + snowflake().verified_stmt("CREATE DATABASE my_db CLONE src_db"); + snowflake().verified_stmt( + "CREATE OR REPLACE DATABASE my_db CLONE src_db DATA_RETENTION_TIME_IN_DAYS = 1", + ); +} + +#[test] +fn test_create_database_with_all_options() { + snowflake().one_statement_parses_to( + r#" + CREATE OR REPLACE TRANSIENT DATABASE IF NOT EXISTS my_db + CLONE src_db + DATA_RETENTION_TIME_IN_DAYS = 1 + MAX_DATA_EXTENSION_TIME_IN_DAYS = 5 + EXTERNAL_VOLUME = 'volume1' + CATALOG = 'my_catalog' + REPLACE_INVALID_CHARACTERS = TRUE + DEFAULT_DDL_COLLATION = 'en-ci' + STORAGE_SERIALIZATION_POLICY = COMPATIBLE + COMMENT = 'This is my database' + CATALOG_SYNC = 'sync_integration' + CATALOG_SYNC_NAMESPACE_FLATTEN_DELIMITER = '/' Review Comment: Can we add an example for `CATALOG_SYNC_NAMESPACE_MODE`? ########## src/ast/mod.rs: ########## @@ -9524,6 +9550,29 @@ impl Display for Tag { } } +/// Snowflake `WITH CONTACT ( purpose = contact [ , purpose = contact ...] )` +/// +/// <https://docs.snowflake.com/en/sql-reference/sql/create-database> +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ContactEntry { + pub purpose: String, + pub contact: String, +} + +impl ContactEntry { + pub fn new(purpose: String, contact: String) -> Self { + Self { purpose, contact } + } +} Review Comment: ```suggestion ``` I think we can remove this in order to keep the public API surface smaller -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org