iffyio commented on code in PR #1747:
URL:
https://github.com/apache/datafusion-sqlparser-rs/pull/1747#discussion_r2023368289
##########
src/parser/mod.rs:
##########
@@ -7081,18 +7029,243 @@ impl<'a> Parser<'a> {
if let Token::Word(word) = self.peek_token().token {
if word.keyword == Keyword::OPTIONS {
- options = Some(self.parse_options(Keyword::OPTIONS)?);
+ table_options =
+
CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?)
}
};
}
+ if !dialect_of!(self is HiveDialect) && table_options ==
CreateTableOptions::None {
+ let plain_options = self.parse_plain_options()?;
+ if !plain_options.is_empty() {
+ table_options = CreateTableOptions::Plain(plain_options)
+ }
+ };
+
Ok(CreateTableConfiguration {
partition_by,
cluster_by,
- options,
+ table_options,
})
}
+ fn parse_plain_option(&mut self) -> Result<Option<SqlOption>, ParserError>
{
+ // Single parameter option
+ if self.parse_keywords(&[Keyword::START, Keyword::TRANSACTION]) {
+ return Ok(Some(SqlOption::Ident(Ident::new("START TRANSACTION"))));
+ }
+
+ // Custom option
+ if self.parse_keywords(&[Keyword::COMMENT]) {
+ let has_eq = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let comment = match (has_eq, value.token) {
+ (true, Token::SingleQuotedString(s)) => {
+ Ok(Some(SqlOption::Comment(CommentDef::WithEq(s))))
+ }
+ (false, Token::SingleQuotedString(s)) => {
+ Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s))))
+ }
+ (_, token) => {
+ self.expected("Token::SingleQuotedString",
TokenWithSpan::wrap(token))
+ }
+ };
+ return comment;
+ }
+
+ if self.parse_keywords(&[Keyword::ENGINE]) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let engine = match value.token {
+ Token::Word(w) => {
+ let parameters = if self.peek_token() == Token::LParen {
+ Some(self.parse_parenthesized_identifiers()?)
+ } else {
+ None
+ };
+
+ Ok(Some(SqlOption::TableEngine(TableEngine {
+ name: w.value,
+ parameters,
+ })))
+ }
+ _ => {
+ return self.expected("Token::Word", value)?;
+ }
+ };
+
+ return engine;
+ }
+
+ if self.parse_keywords(&[Keyword::TABLESPACE]) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let tablespace = match value.token {
+ // TABLESPACE tablespace_name [STORAGE DISK] | [TABLESPACE
tablespace_name] STORAGE MEMORY
+ Token::Word(Word { value: name, .. }) |
Token::SingleQuotedString(name) => {
+ let storage = match self.parse_keyword(Keyword::STORAGE) {
+ true => {
+ let _ = self.consume_token(&Token::Eq);
+ let storage_token = self.next_token();
+ match &storage_token.token {
+ Token::Word(w) => match
w.value.to_uppercase().as_str() {
+ "DISK" => Some(StorageType::Disk),
+ "MEMORY" => Some(StorageType::Memory),
+ _ => self
+ .expected("Storage type (DISK or
MEMORY)", storage_token)?,
+ },
+ _ => self.expected("Token::Word",
storage_token)?,
+ }
+ }
+ false => None,
+ };
+
+ Ok(Some(SqlOption::TableSpace(TablespaceOption {
+ name,
+ storage,
+ })))
+ }
+ _ => {
+ return self.expected("Token::Word", value)?;
+ }
+ };
+
+ return tablespace;
+ }
+
+ if self.parse_keyword(Keyword::UNION) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ match value.token {
+ // UNION [=] (tbl_name[,tbl_name]...)
+ Token::LParen => {
+ let tables: Vec<Ident> =
+ self.parse_comma_separated0(Parser::parse_identifier,
Token::RParen)?;
+ self.expect_token(&Token::RParen)?;
+
+ return Ok(Some(SqlOption::Union(tables)));
+ }
+ _ => {
+ return self.expected("Token::LParen", value)?;
+ }
+ }
+ }
+
+ // Key/Value parameter option
+ let key = if self.parse_keywords(&[Keyword::DEFAULT,
Keyword::CHARSET]) {
Review Comment:
hmm I thought that it made a lot of sense with the previous version that
delegated parsing the options to the (at least mysql) dialect, where the
dialect only handles the special cases while the parser handles the generic
cases. The behavior would be self documenting too. It would be nice to somehow
flag where each parameter is coming from and link to the docs. No need to move
things back to the dialect though if that's a hassle, could be done as a follow
up afterwards so that part doesn't need to go into this PR I dont think
##########
src/parser/mod.rs:
##########
@@ -7082,18 +7030,243 @@ impl<'a> Parser<'a> {
if let Token::Word(word) = self.peek_token().token {
if word.keyword == Keyword::OPTIONS {
- options = Some(self.parse_options(Keyword::OPTIONS)?);
+ table_options =
+
CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?)
}
};
}
+ if !dialect_of!(self is HiveDialect) && table_options ==
CreateTableOptions::None {
+ let plain_options = self.parse_plain_options()?;
+ if !plain_options.is_empty() {
+ table_options = CreateTableOptions::Plain(plain_options)
+ }
+ };
+
Ok(CreateTableConfiguration {
partition_by,
cluster_by,
- options,
+ table_options,
})
}
+ fn parse_plain_option(&mut self) -> Result<Option<SqlOption>, ParserError>
{
+ // Single parameter option
+ if self.parse_keywords(&[Keyword::START, Keyword::TRANSACTION]) {
+ return Ok(Some(SqlOption::Ident(Ident::new("START TRANSACTION"))));
+ }
+
+ // Custom option
+ if self.parse_keywords(&[Keyword::COMMENT]) {
+ let has_eq = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let comment = match (has_eq, value.token) {
+ (true, Token::SingleQuotedString(s)) => {
+ Ok(Some(SqlOption::Comment(CommentDef::WithEq(s))))
+ }
+ (false, Token::SingleQuotedString(s)) => {
+ Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s))))
+ }
+ (_, token) => {
+ self.expected("Token::SingleQuotedString",
TokenWithSpan::wrap(token))
+ }
+ };
+ return comment;
+ }
+
+ if self.parse_keywords(&[Keyword::ENGINE]) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let engine = match value.token {
+ Token::Word(w) => {
+ let parameters = if self.peek_token() == Token::LParen {
+ Some(self.parse_parenthesized_identifiers()?)
+ } else {
+ None
+ };
+
+ Ok(Some(SqlOption::TableEngine(TableEngine {
+ name: w.value,
+ parameters,
+ })))
+ }
+ _ => {
+ return self.expected("Token::Word", value)?;
+ }
+ };
+
+ return engine;
+ }
+
+ if self.parse_keywords(&[Keyword::TABLESPACE]) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let tablespace = match value.token {
+ // TABLESPACE tablespace_name [STORAGE DISK] | [TABLESPACE
tablespace_name] STORAGE MEMORY
+ Token::Word(Word { value: name, .. }) |
Token::SingleQuotedString(name) => {
+ let storage = match self.parse_keyword(Keyword::STORAGE) {
+ true => {
+ let _ = self.consume_token(&Token::Eq);
+ let storage_token = self.next_token();
+ match &storage_token.token {
+ Token::Word(w) => match
w.value.to_uppercase().as_str() {
+ "DISK" => Some(StorageType::Disk),
+ "MEMORY" => Some(StorageType::Memory),
+ _ => self
+ .expected("Storage type (DISK or
MEMORY)", storage_token)?,
+ },
+ _ => self.expected("Token::Word",
storage_token)?,
+ }
+ }
+ false => None,
+ };
+
+ Ok(Some(SqlOption::TableSpace(TablespaceOption {
+ name,
+ storage,
+ })))
Review Comment:
thinking similar to `Union` it would be good to have a reusable
representation for this, maybe?
```rust
SqlOption::NamedKeyValue {
name: Ident, // TABLESPACE
key: Ident,
value: Expr
}
```
##########
src/parser/mod.rs:
##########
@@ -7081,18 +7029,243 @@ impl<'a> Parser<'a> {
if let Token::Word(word) = self.peek_token().token {
if word.keyword == Keyword::OPTIONS {
- options = Some(self.parse_options(Keyword::OPTIONS)?);
+ table_options =
+
CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?)
}
};
}
+ if !dialect_of!(self is HiveDialect) && table_options ==
CreateTableOptions::None {
+ let plain_options = self.parse_plain_options()?;
+ if !plain_options.is_empty() {
+ table_options = CreateTableOptions::Plain(plain_options)
+ }
+ };
+
Ok(CreateTableConfiguration {
partition_by,
cluster_by,
- options,
+ table_options,
})
}
+ fn parse_plain_option(&mut self) -> Result<Option<SqlOption>, ParserError>
{
+ // Single parameter option
+ if self.parse_keywords(&[Keyword::START, Keyword::TRANSACTION]) {
+ return Ok(Some(SqlOption::Ident(Ident::new("START TRANSACTION"))));
+ }
+
+ // Custom option
+ if self.parse_keywords(&[Keyword::COMMENT]) {
+ let has_eq = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let comment = match (has_eq, value.token) {
+ (true, Token::SingleQuotedString(s)) => {
+ Ok(Some(SqlOption::Comment(CommentDef::WithEq(s))))
+ }
+ (false, Token::SingleQuotedString(s)) => {
+ Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s))))
+ }
+ (_, token) => {
+ self.expected("Token::SingleQuotedString",
TokenWithSpan::wrap(token))
+ }
+ };
+ return comment;
+ }
+
+ if self.parse_keywords(&[Keyword::ENGINE]) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let engine = match value.token {
+ Token::Word(w) => {
+ let parameters = if self.peek_token() == Token::LParen {
+ Some(self.parse_parenthesized_identifiers()?)
+ } else {
+ None
+ };
+
+ Ok(Some(SqlOption::TableEngine(TableEngine {
+ name: w.value,
+ parameters,
+ })))
+ }
+ _ => {
+ return self.expected("Token::Word", value)?;
+ }
+ };
+
+ return engine;
+ }
+
+ if self.parse_keywords(&[Keyword::TABLESPACE]) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let tablespace = match value.token {
+ // TABLESPACE tablespace_name [STORAGE DISK] | [TABLESPACE
tablespace_name] STORAGE MEMORY
+ Token::Word(Word { value: name, .. }) |
Token::SingleQuotedString(name) => {
+ let storage = match self.parse_keyword(Keyword::STORAGE) {
+ true => {
+ let _ = self.consume_token(&Token::Eq);
+ let storage_token = self.next_token();
+ match &storage_token.token {
+ Token::Word(w) => match
w.value.to_uppercase().as_str() {
+ "DISK" => Some(StorageType::Disk),
+ "MEMORY" => Some(StorageType::Memory),
+ _ => self
+ .expected("Storage type (DISK or
MEMORY)", storage_token)?,
+ },
+ _ => self.expected("Token::Word",
storage_token)?,
+ }
+ }
+ false => None,
+ };
+
+ Ok(Some(SqlOption::TableSpace(TablespaceOption {
+ name,
+ storage,
+ })))
+ }
+ _ => {
+ return self.expected("Token::Word", value)?;
+ }
+ };
+
+ return tablespace;
+ }
+
+ if self.parse_keyword(Keyword::UNION) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ match value.token {
+ // UNION [=] (tbl_name[,tbl_name]...)
+ Token::LParen => {
+ let tables: Vec<Ident> =
+ self.parse_comma_separated0(Parser::parse_identifier,
Token::RParen)?;
+ self.expect_token(&Token::RParen)?;
+
+ return Ok(Some(SqlOption::Union(tables)));
Review Comment:
Instead of calling this variant `Union` can make it reusable for future
dialects or similar options? something like this maybe
```rust
SqlOption::NamedParenthesizedList {
name: Ident, // Union, TABLEENGINE
key: Option<Ident>, // table engine name
values: Vec<Expr>, // (val1, val2 ...)
}
```
##########
src/parser/mod.rs:
##########
@@ -7081,18 +7029,243 @@ impl<'a> Parser<'a> {
if let Token::Word(word) = self.peek_token().token {
if word.keyword == Keyword::OPTIONS {
- options = Some(self.parse_options(Keyword::OPTIONS)?);
+ table_options =
+
CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?)
}
};
}
+ if !dialect_of!(self is HiveDialect) && table_options ==
CreateTableOptions::None {
+ let plain_options = self.parse_plain_options()?;
+ if !plain_options.is_empty() {
+ table_options = CreateTableOptions::Plain(plain_options)
+ }
+ };
+
Ok(CreateTableConfiguration {
partition_by,
cluster_by,
- options,
+ table_options,
})
}
+ fn parse_plain_option(&mut self) -> Result<Option<SqlOption>, ParserError>
{
+ // Single parameter option
+ if self.parse_keywords(&[Keyword::START, Keyword::TRANSACTION]) {
+ return Ok(Some(SqlOption::Ident(Ident::new("START TRANSACTION"))));
+ }
+
+ // Custom option
+ if self.parse_keywords(&[Keyword::COMMENT]) {
+ let has_eq = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let comment = match (has_eq, value.token) {
+ (true, Token::SingleQuotedString(s)) => {
+ Ok(Some(SqlOption::Comment(CommentDef::WithEq(s))))
+ }
+ (false, Token::SingleQuotedString(s)) => {
+ Ok(Some(SqlOption::Comment(CommentDef::WithoutEq(s))))
+ }
+ (_, token) => {
+ self.expected("Token::SingleQuotedString",
TokenWithSpan::wrap(token))
+ }
+ };
+ return comment;
+ }
+
+ if self.parse_keywords(&[Keyword::ENGINE]) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let engine = match value.token {
+ Token::Word(w) => {
+ let parameters = if self.peek_token() == Token::LParen {
+ Some(self.parse_parenthesized_identifiers()?)
+ } else {
+ None
+ };
+
+ Ok(Some(SqlOption::TableEngine(TableEngine {
+ name: w.value,
+ parameters,
+ })))
+ }
+ _ => {
+ return self.expected("Token::Word", value)?;
+ }
+ };
+
+ return engine;
+ }
+
+ if self.parse_keywords(&[Keyword::TABLESPACE]) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ let tablespace = match value.token {
+ // TABLESPACE tablespace_name [STORAGE DISK] | [TABLESPACE
tablespace_name] STORAGE MEMORY
+ Token::Word(Word { value: name, .. }) |
Token::SingleQuotedString(name) => {
+ let storage = match self.parse_keyword(Keyword::STORAGE) {
+ true => {
+ let _ = self.consume_token(&Token::Eq);
+ let storage_token = self.next_token();
+ match &storage_token.token {
+ Token::Word(w) => match
w.value.to_uppercase().as_str() {
+ "DISK" => Some(StorageType::Disk),
+ "MEMORY" => Some(StorageType::Memory),
+ _ => self
+ .expected("Storage type (DISK or
MEMORY)", storage_token)?,
+ },
+ _ => self.expected("Token::Word",
storage_token)?,
+ }
+ }
+ false => None,
+ };
+
+ Ok(Some(SqlOption::TableSpace(TablespaceOption {
+ name,
+ storage,
+ })))
+ }
+ _ => {
+ return self.expected("Token::Word", value)?;
+ }
+ };
+
+ return tablespace;
+ }
+
+ if self.parse_keyword(Keyword::UNION) {
+ let _ = self.consume_token(&Token::Eq);
+ let value = self.next_token();
+
+ match value.token {
+ // UNION [=] (tbl_name[,tbl_name]...)
+ Token::LParen => {
+ let tables: Vec<Ident> =
+ self.parse_comma_separated0(Parser::parse_identifier,
Token::RParen)?;
+ self.expect_token(&Token::RParen)?;
+
+ return Ok(Some(SqlOption::Union(tables)));
+ }
+ _ => {
+ return self.expected("Token::LParen", value)?;
+ }
+ }
+ }
+
+ // Key/Value parameter option
+ let key = if self.parse_keywords(&[Keyword::DEFAULT,
Keyword::CHARSET]) {
+ // [DEFAULT] CHARACTER SET [=] charset_name
+ Ident::new("DEFAULT CHARSET")
+ } else if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARACTER,
Keyword::SET]) {
+ // [DEFAULT] CHARACTER SET [=] charset_name
+ Ident::new("DEFAULT CHARACTER SET")
+ } else if self.parse_keywords(&[Keyword::DEFAULT, Keyword::COLLATE]) {
+ // [DEFAULT] COLLATE [=] collation_name
+ Ident::new("DEFAULT COLLATE")
+ } else if self.parse_keywords(&[Keyword::DATA, Keyword::DIRECTORY]) {
+ // {DATA | INDEX} DIRECTORY [=] 'absolute path to directory'
+ Ident::new("DATA DIRECTORY")
+ } else if self.parse_keywords(&[Keyword::INDEX, Keyword::DIRECTORY]) {
+ // {DATA | INDEX} DIRECTORY [=] 'absolute path to directory'
+ Ident::new("INDEX DIRECTORY")
+ } else if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) {
+ // [DEFAULT] CHARACTER SET [=] charset_name
+ Ident::new("CHARACTER SET")
+ } else if self.parse_keyword(Keyword::CHARSET) {
+ // [DEFAULT] CHARACTER SET [=] charset_name
+ Ident::new("CHARSET")
+ } else if self.parse_keyword(Keyword::COLLATE) {
+ // [DEFAULT] CHARACTER SET [=] charset_name
+ Ident::new("COLLATE")
+ } else if self.parse_keyword(Keyword::KEY_BLOCK_SIZE) {
+ // KEY_BLOCK_SIZE [=] value
+ Ident::new("KEY_BLOCK_SIZE")
+ } else if self.parse_keyword(Keyword::ROW_FORMAT) {
+ // ROW_FORMAT [=] {DEFAULT | DYNAMIC | FIXED | COMPRESSED |
REDUNDANT | COMPACT}
+ Ident::new("ROW_FORMAT")
+ } else if self.parse_keyword(Keyword::PACK_KEYS) {
+ // PACK_KEYS [=] {0 | 1 | DEFAULT}
+ Ident::new("PACK_KEYS")
+ } else if self.parse_keyword(Keyword::STATS_AUTO_RECALC) {
+ // STATS_AUTO_RECALC [=] {DEFAULT | 0 | 1}
+ Ident::new("STATS_AUTO_RECALC")
+ } else if self.parse_keyword(Keyword::STATS_PERSISTENT) {
+ //STATS_PERSISTENT [=] {DEFAULT | 0 | 1}
+ Ident::new("STATS_PERSISTENT")
+ } else if self.parse_keyword(Keyword::STATS_SAMPLE_PAGES) {
+ // STATS_SAMPLE_PAGES [=] value
+ Ident::new("STATS_SAMPLE_PAGES")
+ } else if self.parse_keyword(Keyword::DELAY_KEY_WRITE) {
+ // DELAY_KEY_WRITE [=] {0 | 1}
+ Ident::new("DELAY_KEY_WRITE")
+ } else if self.parse_keyword(Keyword::COMPRESSION) {
+ // COMPRESSION [=] {'ZLIB' | 'LZ4' | 'NONE'}
+ Ident::new("COMPRESSION")
+ } else if self.parse_keyword(Keyword::ENCRYPTION) {
+ // ENCRYPTION [=] {'Y' | 'N'}
+ Ident::new("ENCRYPTION")
+ } else if self.parse_keyword(Keyword::MAX_ROWS) {
+ // MAX_ROWS [=] value
+ Ident::new("MAX_ROWS")
+ } else if self.parse_keyword(Keyword::MIN_ROWS) {
+ // MIN_ROWS [=] value
+ Ident::new("MIN_ROWS")
+ } else if self.parse_keyword(Keyword::AUTOEXTEND_SIZE) {
+ // AUTOEXTEND_SIZE [=] value
+ Ident::new("AUTOEXTEND_SIZE")
+ } else if self.parse_keyword(Keyword::AVG_ROW_LENGTH) {
+ // AVG_ROW_LENGTH [=] value
+ Ident::new("AVG_ROW_LENGTH")
+ } else if self.parse_keyword(Keyword::CHECKSUM) {
+ // CHECKSUM [=] {0 | 1}
+ Ident::new("CHECKSUM")
+ } else if self.parse_keyword(Keyword::CONNECTION) {
+ // CONNECTION [=] 'connect_string'
+ Ident::new("CONNECTION")
+ } else if self.parse_keyword(Keyword::ENGINE_ATTRIBUTE) {
+ // ENGINE_ATTRIBUTE [=] 'string'
+ Ident::new("ENGINE_ATTRIBUTE")
+ } else if self.parse_keyword(Keyword::PASSWORD) {
+ // PASSWORD [=] 'string'
+ Ident::new("PASSWORD")
+ } else if self.parse_keyword(Keyword::SECONDARY_ENGINE_ATTRIBUTE) {
+ // SECONDARY_ENGINE_ATTRIBUTE [=] 'string'
+ Ident::new("SECONDARY_ENGINE_ATTRIBUTE")
+ } else if self.parse_keyword(Keyword::INSERT_METHOD) {
+ // INSERT_METHOD [=] { NO | FIRST | LAST }
+ Ident::new("INSERT_METHOD")
+ } else if self.parse_keyword(Keyword::AUTO_INCREMENT) {
+ Ident::new("AUTO_INCREMENT")
Review Comment:
Ah so for this part I think these can be handled transparently, its what I
meant by this section in my previous comment
```rust
let _ = self.consume_token('=')?;
let value = self.parse_expr()?;
Ok(Some(SqlOption::KeyValue {
key, value
}))
```
in that the ideal scenario avoids enumerating the various options where
possible
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]