iffyio commented on code in PR #2272:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/2272#discussion_r2904281820


##########
src/parser/mod.rs:
##########
@@ -4223,13 +4233,26 @@ impl<'a> Parser<'a> {
         loop {
             match self.next_token().token {
                 Token::Colon if path.is_empty() => {
-                    path.push(self.parse_json_path_object_key()?);
+                    if self.peek_token_ref().token == Token::LBracket {
+                        // A bracket element directly after the colon, e.g. 
`raw:['field']`.
+                        // Push an empty Dot so the display re-emits the 
leading `:` for syntax roundtrip.
+                        path.push(JsonPathElem::Dot {
+                            key: String::new(),
+                            quoted: false,
+                        });
+                        self.next_token();
+                        let key = self.parse_json_path_bracket_key()?;
+                        self.expect_token(&Token::RBracket)?;
+                        path.push(JsonPathElem::Bracket { key });
+                    } else {
+                        path.push(self.parse_json_path_object_key()?);
+                    }
                 }
                 Token::Period if !path.is_empty() => {
                     path.push(self.parse_json_path_object_key()?);
                 }
                 Token::LBracket => {
-                    let key = self.parse_expr()?;
+                    let key = self.parse_json_path_bracket_key()?;

Review Comment:
   ```suggestion
                       let key = self.parse_wildcard_expr()?;
   ```
   would this work as well?



##########
src/parser/mod.rs:
##########
@@ -4223,13 +4233,26 @@ impl<'a> Parser<'a> {
         loop {
             match self.next_token().token {
                 Token::Colon if path.is_empty() => {
-                    path.push(self.parse_json_path_object_key()?);
+                    if self.peek_token_ref().token == Token::LBracket {
+                        // A bracket element directly after the colon, e.g. 
`raw:['field']`.
+                        // Push an empty Dot so the display re-emits the 
leading `:` for syntax roundtrip.
+                        path.push(JsonPathElem::Dot {
+                            key: String::new(),
+                            quoted: false,
+                        });
+                        self.next_token();
+                        let key = self.parse_json_path_bracket_key()?;
+                        self.expect_token(&Token::RBracket)?;
+                        path.push(JsonPathElem::Bracket { key });

Review Comment:
   I think we can instead introduce a `JsonPathElem::ColonBracket` variant?
   
   and in this match statement we can give it its own block with something like
   ```rust
   Token::Colon if path.is_empty() && self.peek_token_ref() == Token::RBracket 
{ ... }
   ```



##########
tests/sqlparser_databricks.rs:
##########
@@ -600,3 +600,150 @@ fn parse_databricks_struct_type() {
         _ => unreachable!(),
     }
 }
+
+// https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html

Review Comment:
   ```suggestion
   ```
   we can add a link to the docs in the introduced enum variant



##########
tests/sqlparser_databricks.rs:
##########
@@ -600,3 +600,150 @@ fn parse_databricks_struct_type() {
         _ => unreachable!(),
     }
 }
+
+// https://docs.databricks.com/en/sql/language-manual/functions/colonsign.html
+#[test]
+fn parse_databricks_json_accessor() {
+    // Basic colon accessor — unquoted field names are case-insensitive
+    databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM 
store_data");
+
+    // Unquoted field access is case-insensitive; bracket notation is 
case-sensitive.
+    databricks().verified_only_select(
+        "SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive 
FROM store_data",
+    );
+
+    // Backtick-quoted keys (Databricks delimited identifiers) normalise to 
double-quoted output.
+    databricks().one_statement_parses_to(
+        "SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM 
store_data",
+        r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM 
store_data"#,
+    );
+
+    // Dot notation
+    databricks().verified_only_select("SELECT raw:store.bicycle FROM 
store_data");
+
+    // String-key bracket notation after a dot segment
+    databricks()
+        .verified_only_select("SELECT raw:store['bicycle'], 
raw:store['BICYCLE'] FROM store_data");
+
+    // Integer-index bracket notation
+    databricks()
+        .verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1] 
FROM store_data");
+
+    // Wildcard [*] — including chained and mixed positions
+    databricks().verified_only_select(
+        "SELECT raw:store.basket[*], raw:store.basket[*][0] AS 
first_of_baskets, \
+         raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS 
all_elements_flattened, \
+         raw:store.basket[0][2].b AS subfield FROM store_data",
+    );
+
+    // Dot access following a wildcard bracket
+    databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM 
store_data");
+
+    // Double-colon cast — type keyword normalises to upper case
+    databricks().one_statement_parses_to(
+        "SELECT raw:store.bicycle.price::double FROM store_data",
+        "SELECT raw:store.bicycle.price::DOUBLE FROM store_data",
+    );
+
+    // --- AST structure assertions ---
+
+    // Simple dot access
+    assert_eq!(
+        databricks().verified_expr("raw:owner"),
+        Expr::JsonAccess {
+            value: Box::new(Expr::Identifier(Ident::new("raw"))),
+            path: JsonPath {
+                path: vec![JsonPathElem::Dot {
+                    key: "owner".to_owned(),
+                    quoted: false,
+                }],
+            },
+        }
+    );
+
+    // Multi-level dot access
+    assert_eq!(
+        databricks().verified_expr("raw:store.bicycle"),
+        Expr::JsonAccess {
+            value: Box::new(Expr::Identifier(Ident::new("raw"))),
+            path: JsonPath {
+                path: vec![
+                    JsonPathElem::Dot {
+                        key: "store".to_owned(),
+                        quoted: false,
+                    },
+                    JsonPathElem::Dot {
+                        key: "bicycle".to_owned(),
+                        quoted: false,
+                    },
+                ],
+            },
+        }
+    );
+
+    // Dot path followed by an integer-index bracket
+    assert_eq!(
+        databricks().verified_expr("raw:store.fruit[0]"),
+        Expr::JsonAccess {
+            value: Box::new(Expr::Identifier(Ident::new("raw"))),
+            path: JsonPath {
+                path: vec![
+                    JsonPathElem::Dot {
+                        key: "store".to_owned(),
+                        quoted: false,
+                    },
+                    JsonPathElem::Dot {
+                        key: "fruit".to_owned(),
+                        quoted: false,
+                    },
+                    JsonPathElem::Bracket {
+                        key: Expr::value(number("0")),
+                    },
+                ],
+            },
+        }
+    );
+
+    // [*] is stored as Expr::Wildcard inside a Bracket element
+    assert_eq!(
+        databricks().verified_expr("raw:store.basket[*]"),
+        Expr::JsonAccess {
+            value: Box::new(Expr::Identifier(Ident::new("raw"))),
+            path: JsonPath {
+                path: vec![
+                    JsonPathElem::Dot {
+                        key: "store".to_owned(),
+                        quoted: false,
+                    },
+                    JsonPathElem::Dot {
+                        key: "basket".to_owned(),
+                        quoted: false,
+                    },
+                    JsonPathElem::Bracket {
+                        key: Expr::Wildcard(AttachedToken::empty()),
+                    },
+                ],
+            },
+        }
+    );
+
+    // raw:['OWNER'] — bracket directly after the colon. An empty-key sentinel 
Dot is prepended
+    // so that the display re-emits the leading `:`, enabling a correct 
round-trip.
+    assert_eq!(
+        databricks().verified_expr("raw:['OWNER']"),
+        Expr::JsonAccess {
+            value: Box::new(Expr::Identifier(Ident::new("raw"))),
+            path: JsonPath {
+                path: vec![
+                    JsonPathElem::Dot {
+                        key: String::new(),
+                        quoted: false,
+                    },
+                    JsonPathElem::Bracket {
+                        key: 
Expr::value(Value::SingleQuotedString("OWNER".to_owned())),
+                    },
+                ],
+            },
+        }
+    );

Review Comment:
   I think we can skip these assertions on the AST and rely on the ones above 
that use the `verified*` helper functions



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to