This is an automated email from the ASF dual-hosted git repository.

iffyio pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-sqlparser-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 7867ba3c Redshift: Fix parsing for quoted numbered columns (#1576)
7867ba3c is described below

commit 7867ba3cf04c9c8324bfa26403945f0d53c2119a
Author: Aleksei Piianin <[email protected]>
AuthorDate: Sun Dec 15 10:56:11 2024 +0100

    Redshift: Fix parsing for quoted numbered columns (#1576)
---
 src/dialect/mod.rs          | 53 ++++++++++++++++++++++++-------
 src/dialect/redshift.rs     | 48 ++++++++++++++++++++++------
 src/tokenizer.rs            | 77 +++++++++++++++++++++++++++++++++++++++------
 tests/sqlparser_redshift.rs | 58 +++++++++++++++++++++++++++++++---
 4 files changed, 200 insertions(+), 36 deletions(-)

diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs
index 8cce6a35..c32b763a 100644
--- a/src/dialect/mod.rs
+++ b/src/dialect/mod.rs
@@ -128,14 +128,39 @@ pub trait Dialect: Debug + Any {
         ch == '"' || ch == '`'
     }
 
-    /// Return the character used to quote identifiers.
-    fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
+    /// Determine if a character starts a potential nested quoted identifier.
+    /// Example: RedShift supports the following quote styles to all mean the 
same thing:
+    /// ```sql
+    /// SELECT 1 AS foo;
+    /// SELECT 1 AS "foo";
+    /// SELECT 1 AS [foo];
+    /// SELECT 1 AS ["foo"];
+    /// ```
+    fn is_nested_delimited_identifier_start(&self, _ch: char) -> bool {
+        false
+    }
+
+    /// Only applicable whenever 
[`Self::is_nested_delimited_identifier_start`] returns true
+    /// If the next sequence of tokens potentially represent a nested 
identifier, then this method
+    /// returns a tuple containing the outer quote style, and if present, the 
inner (nested) quote style.
+    ///
+    /// Example (Redshift):
+    /// ```text
+    /// `["foo"]` => Some(`[`, Some(`"`))
+    /// `[foo]` => Some(`[`, None)
+    /// `[0]` => None
+    /// `"foo"` => None
+    /// ```
+    fn peek_nested_delimited_identifier_quotes(
+        &self,
+        mut _chars: Peekable<Chars<'_>>,
+    ) -> Option<(char, Option<char>)> {
         None
     }
 
-    /// Determine if quoted characters are proper for identifier
-    fn is_proper_identifier_inside_quotes(&self, mut _chars: 
Peekable<Chars<'_>>) -> bool {
-        true
+    /// Return the character used to quote identifiers.
+    fn identifier_quote_style(&self, _identifier: &str) -> Option<char> {
+        None
     }
 
     /// Determine if a character is a valid start character for an unquoted 
identifier
@@ -869,6 +894,17 @@ mod tests {
                 self.0.is_delimited_identifier_start(ch)
             }
 
+            fn is_nested_delimited_identifier_start(&self, ch: char) -> bool {
+                self.0.is_nested_delimited_identifier_start(ch)
+            }
+
+            fn peek_nested_delimited_identifier_quotes(
+                &self,
+                chars: std::iter::Peekable<std::str::Chars<'_>>,
+            ) -> Option<(char, Option<char>)> {
+                self.0.peek_nested_delimited_identifier_quotes(chars)
+            }
+
             fn identifier_quote_style(&self, identifier: &str) -> Option<char> 
{
                 self.0.identifier_quote_style(identifier)
             }
@@ -877,13 +913,6 @@ mod tests {
                 self.0.supports_string_literal_backslash_escape()
             }
 
-            fn is_proper_identifier_inside_quotes(
-                &self,
-                chars: std::iter::Peekable<std::str::Chars<'_>>,
-            ) -> bool {
-                self.0.is_proper_identifier_inside_quotes(chars)
-            }
-
             fn supports_filter_during_aggregation(&self) -> bool {
                 self.0.supports_filter_during_aggregation()
             }
diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs
index 48eb00ab..55405ba5 100644
--- a/src/dialect/redshift.rs
+++ b/src/dialect/redshift.rs
@@ -32,21 +32,51 @@ pub struct RedshiftSqlDialect {}
 // in the Postgres dialect, the query will be parsed as an array, while in the 
Redshift dialect it will
 // be a json path
 impl Dialect for RedshiftSqlDialect {
-    fn is_delimited_identifier_start(&self, ch: char) -> bool {
-        ch == '"' || ch == '['
+    /// Determine if a character starts a potential nested quoted identifier.
+    /// Example: RedShift supports the following quote styles to all mean the 
same thing:
+    /// ```sql
+    /// SELECT 1 AS foo;
+    /// SELECT 1 AS "foo";
+    /// SELECT 1 AS [foo];
+    /// SELECT 1 AS ["foo"];
+    /// ```
+    fn is_nested_delimited_identifier_start(&self, ch: char) -> bool {
+        ch == '['
     }
 
-    /// Determine if quoted characters are proper for identifier
-    /// It's needed to distinguish treating square brackets as quotes from
-    /// treating them as json path. If there is identifier then we assume
-    /// there is no json path.
-    fn is_proper_identifier_inside_quotes(&self, mut chars: 
Peekable<Chars<'_>>) -> bool {
+    /// Only applicable whenever 
[`Self::is_nested_delimited_identifier_start`] returns true
+    /// If the next sequence of tokens potentially represent a nested 
identifier, then this method
+    /// returns a tuple containing the outer quote style, and if present, the 
inner (nested) quote style.
+    ///
+    /// Example (Redshift):
+    /// ```text
+    /// `["foo"]` => Some(`[`, Some(`"`))
+    /// `[foo]` => Some(`[`, None)
+    /// `[0]` => None
+    /// `"foo"` => None
+    /// ```
+    fn peek_nested_delimited_identifier_quotes(
+        &self,
+        mut chars: Peekable<Chars<'_>>,
+    ) -> Option<(char, Option<char>)> {
+        if chars.peek() != Some(&'[') {
+            return None;
+        }
+
         chars.next();
+
         let mut not_white_chars = chars.skip_while(|ch| 
ch.is_whitespace()).peekable();
+
         if let Some(&ch) = not_white_chars.peek() {
-            return self.is_identifier_start(ch);
+            if ch == '"' {
+                return Some(('[', Some('"')));
+            }
+            if self.is_identifier_start(ch) {
+                return Some(('[', None));
+            }
         }
-        false
+
+        None
     }
 
     fn is_identifier_start(&self, ch: char) -> bool {
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index aacfc16f..9269f4fe 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1075,25 +1075,61 @@ impl<'a> Tokenizer<'a> {
                     Ok(Some(Token::DoubleQuotedString(s)))
                 }
                 // delimited (quoted) identifier
+                quote_start if self.dialect.is_delimited_identifier_start(ch) 
=> {
+                    let word = self.tokenize_quoted_identifier(quote_start, 
chars)?;
+                    Ok(Some(Token::make_word(&word, Some(quote_start))))
+                }
+                // Potentially nested delimited (quoted) identifier
                 quote_start
-                    if self.dialect.is_delimited_identifier_start(ch)
+                    if self
+                        .dialect
+                        .is_nested_delimited_identifier_start(quote_start)
                         && self
                             .dialect
-                            
.is_proper_identifier_inside_quotes(chars.peekable.clone()) =>
+                            
.peek_nested_delimited_identifier_quotes(chars.peekable.clone())
+                            .is_some() =>
                 {
-                    let error_loc = chars.location();
-                    chars.next(); // consume the opening quote
+                    let Some((quote_start, nested_quote_start)) = self
+                        .dialect
+                        
.peek_nested_delimited_identifier_quotes(chars.peekable.clone())
+                    else {
+                        return self.tokenizer_error(
+                            chars.location(),
+                            format!("Expected nested delimiter '{quote_start}' 
before EOF."),
+                        );
+                    };
+
+                    let Some(nested_quote_start) = nested_quote_start else {
+                        let word = 
self.tokenize_quoted_identifier(quote_start, chars)?;
+                        return Ok(Some(Token::make_word(&word, 
Some(quote_start))));
+                    };
+
+                    let mut word = vec![];
                     let quote_end = Word::matching_end_quote(quote_start);
-                    let (s, last_char) = self.parse_quoted_ident(chars, 
quote_end);
+                    let nested_quote_end = 
Word::matching_end_quote(nested_quote_start);
+                    let error_loc = chars.location();
 
-                    if last_char == Some(quote_end) {
-                        Ok(Some(Token::make_word(&s, Some(quote_start))))
-                    } else {
-                        self.tokenizer_error(
+                    chars.next(); // skip the first delimiter
+                    peeking_take_while(chars, |ch| ch.is_whitespace());
+                    if chars.peek() != Some(&nested_quote_start) {
+                        return self.tokenizer_error(
+                            error_loc,
+                            format!("Expected nested delimiter 
'{nested_quote_start}' before EOF."),
+                        );
+                    }
+                    word.push(nested_quote_start.into());
+                    
word.push(self.tokenize_quoted_identifier(nested_quote_end, chars)?);
+                    word.push(nested_quote_end.into());
+                    peeking_take_while(chars, |ch| ch.is_whitespace());
+                    if chars.peek() != Some(&quote_end) {
+                        return self.tokenizer_error(
                             error_loc,
                             format!("Expected close delimiter '{quote_end}' 
before EOF."),
-                        )
+                        );
                     }
+                    chars.next(); // skip close delimiter
+
+                    Ok(Some(Token::make_word(&word.concat(), 
Some(quote_start))))
                 }
                 // numbers and period
                 '0'..='9' | '.' => {
@@ -1597,6 +1633,27 @@ impl<'a> Tokenizer<'a> {
         s
     }
 
+    /// Read a quoted identifier
+    fn tokenize_quoted_identifier(
+        &self,
+        quote_start: char,
+        chars: &mut State,
+    ) -> Result<String, TokenizerError> {
+        let error_loc = chars.location();
+        chars.next(); // consume the opening quote
+        let quote_end = Word::matching_end_quote(quote_start);
+        let (s, last_char) = self.parse_quoted_ident(chars, quote_end);
+
+        if last_char == Some(quote_end) {
+            Ok(s)
+        } else {
+            self.tokenizer_error(
+                error_loc,
+                format!("Expected close delimiter '{quote_end}' before EOF."),
+            )
+        }
+    }
+
     /// Read a single quoted string, starting with the opening quote.
     fn tokenize_escaped_single_quoted_string(
         &self,
diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs
index 9492946d..857d378b 100644
--- a/tests/sqlparser_redshift.rs
+++ b/tests/sqlparser_redshift.rs
@@ -157,6 +157,8 @@ fn parse_delimited_identifiers() {
     }
 
     redshift().verified_stmt(r#"CREATE TABLE "foo" ("bar" "int")"#);
+    // An alias starting with a number
+    redshift().verified_stmt(r#"CREATE TABLE "foo" ("1" INT)"#);
     redshift().verified_stmt(r#"ALTER TABLE foo ADD CONSTRAINT "bar" PRIMARY 
KEY (baz)"#);
     //TODO verified_stmt(r#"UPDATE foo SET "bar" = 5"#);
 }
@@ -203,7 +205,7 @@ fn test_redshift_json_path() {
             path: JsonPath {
                 path: vec![
                     JsonPathElem::Bracket {
-                        key: Expr::Value(Value::Number("0".parse().unwrap(), 
false))
+                        key: Expr::Value(number("0"))
                     },
                     JsonPathElem::Dot {
                         key: "o_orderkey".to_string(),
@@ -226,7 +228,7 @@ fn test_redshift_json_path() {
             path: JsonPath {
                 path: vec![
                     JsonPathElem::Bracket {
-                        key: Expr::Value(Value::Number("0".parse().unwrap(), 
false))
+                        key: Expr::Value(number("0"))
                     },
                     JsonPathElem::Bracket {
                         key: 
Expr::Value(Value::SingleQuotedString("id".to_owned()))
@@ -250,7 +252,7 @@ fn test_redshift_json_path() {
             path: JsonPath {
                 path: vec![
                     JsonPathElem::Bracket {
-                        key: Expr::Value(Value::Number("0".parse().unwrap(), 
false))
+                        key: Expr::Value(number("0"))
                     },
                     JsonPathElem::Bracket {
                         key: 
Expr::Value(Value::SingleQuotedString("id".to_owned()))
@@ -260,6 +262,31 @@ fn test_redshift_json_path() {
         },
         expr_from_projection(only(&select.projection))
     );
+
+    let sql = r#"SELECT db1.sc1.tbl1.col1[0]."id" FROM 
customer_orders_lineitem"#;
+    let select = dialects.verified_only_select(sql);
+    assert_eq!(
+        &Expr::JsonAccess {
+            value: Box::new(Expr::CompoundIdentifier(vec![
+                Ident::new("db1"),
+                Ident::new("sc1"),
+                Ident::new("tbl1"),
+                Ident::new("col1")
+            ])),
+            path: JsonPath {
+                path: vec![
+                    JsonPathElem::Bracket {
+                        key: Expr::Value(number("0"))
+                    },
+                    JsonPathElem::Dot {
+                        key: "id".to_string(),
+                        quoted: true,
+                    }
+                ]
+            }
+        },
+        expr_from_projection(only(&select.projection))
+    );
 }
 
 #[test]
@@ -276,7 +303,7 @@ fn test_parse_json_path_from() {
                 &Some(JsonPath {
                     path: vec![
                         JsonPathElem::Bracket {
-                            key: 
Expr::Value(Value::Number("0".parse().unwrap(), false))
+                            key: Expr::Value(number("0"))
                         },
                         JsonPathElem::Dot {
                             key: "a".to_string(),
@@ -300,7 +327,7 @@ fn test_parse_json_path_from() {
                 &Some(JsonPath {
                     path: vec![
                         JsonPathElem::Bracket {
-                            key: 
Expr::Value(Value::Number("0".parse().unwrap(), false))
+                            key: Expr::Value(number("0"))
                         },
                         JsonPathElem::Dot {
                             key: "a".to_string(),
@@ -334,3 +361,24 @@ fn test_parse_json_path_from() {
         _ => panic!(),
     }
 }
+
+#[test]
+fn test_parse_select_numbered_columns() {
+    // An alias starting with a number
+    redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1" FROM a"#);
+    redshift_and_generic().verified_stmt(r#"SELECT 1 AS "1abc" FROM a"#);
+}
+
+#[test]
+fn test_parse_nested_quoted_identifier() {
+    redshift().verified_stmt(r#"SELECT 1 AS ["1"] FROM a"#);
+    redshift().verified_stmt(r#"SELECT 1 AS ["[="] FROM a"#);
+    redshift().verified_stmt(r#"SELECT 1 AS ["=]"] FROM a"#);
+    redshift().verified_stmt(r#"SELECT 1 AS ["a[b]"] FROM a"#);
+    // trim spaces
+    redshift().one_statement_parses_to(r#"SELECT 1 AS [ " 1 " ]"#, r#"SELECT 1 
AS [" 1 "]"#);
+    // invalid query
+    assert!(redshift()
+        .parse_sql_statements(r#"SELECT 1 AS ["1]"#)
+        .is_err());
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to