iffyio commented on code in PR #2333:
URL: 
https://github.com/apache/datafusion-sqlparser-rs/pull/2333#discussion_r3201102320


##########
tests/sqlparser_clickhouse.rs:
##########
@@ -1845,6 +1845,70 @@ fn parse_inner_array_join() {
     }
 }
 
+#[test]
+fn parse_with_clause_named_expression() {
+    // Plain literal scalar.
+    clickhouse().verified_stmt("WITH 42 AS answer SELECT answer FROM t");
+
+    // String literal scalar from the ClickHouse docs.
+    clickhouse().verified_stmt(
+        "WITH '2019-08-01 15:23:00' AS ts_upper_bound SELECT * FROM hits \
+         WHERE EventDate = toDate(ts_upper_bound) AND EventTime <= 
ts_upper_bound",
+    );
+
+    // Aggregate function call as a named expression.
+    clickhouse().verified_stmt(
+        "WITH sum(bytes) AS s SELECT formatReadableSize(s), \"table\" \
+         FROM system.parts GROUP BY \"table\" ORDER BY s",
+    );
+
+    // Scalar subquery as the bound expression.
+    clickhouse().verified_stmt(
+        "WITH (SELECT sum(bytes) FROM system.parts WHERE active) AS 
total_disk_usage \
+         SELECT (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage, 
\"table\" \
+         FROM system.parts GROUP BY \"table\" ORDER BY table_disk_usage DESC 
LIMIT 10",
+    );
+
+    // Bare-identifier scalar — disambiguation case (`name AS alias` looks like
+    // a CTE prefix but the missing `(` after `AS` makes it a named 
expression).
+    clickhouse().verified_stmt("WITH user_id AS uid SELECT uid FROM t");
+
+    // Mixing a named expression with a real CTE in the same WITH list.
+    clickhouse().verified_stmt("WITH 1 AS one, cte AS (SELECT 1) SELECT one 
FROM cte");
+
+    // Lambda as the bound expression (also taken from the docs).
+    clickhouse().verified_stmt(
+        "WITH '.txt' AS extension, (id, extension) -> concat(lower(id), 
extension) AS gen_name \
+         SELECT gen_name('test', '.sql') AS file_name",
+    );
+}
+
+#[test]
+fn parse_with_clause_named_expression_ast() {

Review Comment:
   we can merge this with the test above since they cover the same feature



##########
tests/sqlparser_clickhouse.rs:
##########
@@ -1845,6 +1845,70 @@ fn parse_inner_array_join() {
     }
 }
 
+#[test]
+fn parse_with_clause_named_expression() {
+    // Plain literal scalar.
+    clickhouse().verified_stmt("WITH 42 AS answer SELECT answer FROM t");
+
+    // String literal scalar from the ClickHouse docs.
+    clickhouse().verified_stmt(
+        "WITH '2019-08-01 15:23:00' AS ts_upper_bound SELECT * FROM hits \
+         WHERE EventDate = toDate(ts_upper_bound) AND EventTime <= 
ts_upper_bound",
+    );
+
+    // Aggregate function call as a named expression.
+    clickhouse().verified_stmt(
+        "WITH sum(bytes) AS s SELECT formatReadableSize(s), \"table\" \
+         FROM system.parts GROUP BY \"table\" ORDER BY s",
+    );
+
+    // Scalar subquery as the bound expression.
+    clickhouse().verified_stmt(
+        "WITH (SELECT sum(bytes) FROM system.parts WHERE active) AS 
total_disk_usage \
+         SELECT (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage, 
\"table\" \
+         FROM system.parts GROUP BY \"table\" ORDER BY table_disk_usage DESC 
LIMIT 10",
+    );
+
+    // Bare-identifier scalar — disambiguation case (`name AS alias` looks like
+    // a CTE prefix but the missing `(` after `AS` makes it a named 
expression).
+    clickhouse().verified_stmt("WITH user_id AS uid SELECT uid FROM t");
+
+    // Mixing a named expression with a real CTE in the same WITH list.
+    clickhouse().verified_stmt("WITH 1 AS one, cte AS (SELECT 1) SELECT one 
FROM cte");
+
+    // Lambda as the bound expression (also taken from the docs).
+    clickhouse().verified_stmt(
+        "WITH '.txt' AS extension, (id, extension) -> concat(lower(id), 
extension) AS gen_name \
+         SELECT gen_name('test', '.sql') AS file_name",
+    );
+}
+
+#[test]
+fn parse_with_clause_named_expression_ast() {
+    let query = clickhouse().verified_query("WITH 42 AS answer SELECT answer 
FROM t");
+    let with = query.with.as_ref().unwrap();
+    assert!(!with.recursive);
+    assert_eq!(with.items.len(), 1);
+    match &with.items[0] {
+        WithItem::Named { expr, alias } => {
+            assert_eq!(alias.value, "answer");
+            assert!(matches!(expr, Expr::Value(_)));
+        }
+        other => panic!("expected a named expression, got {other:?}"),
+    }
+}
+
+#[test]
+fn parse_with_clause_named_expression_unsupported_in_other_dialects() {
+    // The named-expression form is only enabled for ClickHouse; other
+    // dialects should still reject `WITH 42 AS answer …`.
+    let res = sqlparser::parser::Parser::parse_sql(
+        &GenericDialect {},
+        "WITH 42 AS answer SELECT answer FROM t",
+    );
+    assert!(res.is_err(), "expected parse error, got {res:?}");
+}

Review Comment:
   I think we can skip this, no need to test what the parser does for an 
invalid query for unsupported dialects



##########
tests/sqlparser_clickhouse.rs:
##########
@@ -1845,6 +1845,70 @@ fn parse_inner_array_join() {
     }
 }
 
+#[test]
+fn parse_with_clause_named_expression() {
+    // Plain literal scalar.
+    clickhouse().verified_stmt("WITH 42 AS answer SELECT answer FROM t");

Review Comment:
   for the tests, lets use `all_dialects_where` instead of hardcoding it to 
clickhouse



##########
src/ast/query.rs:
##########
@@ -764,11 +765,41 @@ impl fmt::Display for With {
         if self.recursive {
             f.write_str("RECURSIVE ")?;
         }
-        display_comma_separated(&self.cte_tables).fmt(f)?;
+        display_comma_separated(&self.items).fmt(f)?;
         Ok(())
     }
 }
 
+/// A single item in a `WITH` clause.
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum WithItem {

Review Comment:
   repr wise can we do something like?
   ```rust
   pub enum WithExpression {
       Cte(...),
       Cse(ExprWithAlias), // can we reuse exprwithalias?
   }



##########
src/ast/query.rs:
##########
@@ -764,11 +765,41 @@ impl fmt::Display for With {
         if self.recursive {
             f.write_str("RECURSIVE ")?;
         }
-        display_comma_separated(&self.cte_tables).fmt(f)?;
+        display_comma_separated(&self.items).fmt(f)?;
         Ok(())
     }
 }
 
+/// A single item in a `WITH` clause.
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum WithItem {
+    /// A traditional common table expression: `name [(cols)] AS 
[MATERIALIZED] (query)`.
+    Cte(Cte),
+    /// `<expr> AS <alias>` — binds an expression (literal, scalar subquery,
+    /// lambda, …) to a name visible in the surrounding query.
+    ///
+    /// See ClickHouse's [common scalar expressions][1].
+    ///
+    /// [1]: 
https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions

Review Comment:
   ```suggestion
       /// A common scalar expression.
       ///
       /// [Clickhouse]: 
https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions
   ```



##########
src/ast/query.rs:
##########
@@ -754,8 +754,9 @@ pub struct With {
     pub with_token: AttachedToken,
     /// Whether the `WITH` is recursive (`WITH RECURSIVE`).
     pub recursive: bool,
-    /// The list of CTEs declared by this `WITH` clause.
-    pub cte_tables: Vec<Cte>,
+    /// The items declared by this `WITH` clause: traditional CTEs and,
+    /// for dialects that support it, named expressions.
+    pub items: Vec<WithItem>,

Review Comment:
   ```suggestion
       pub exprs: Vec<WithExpression>,
   ```



##########
src/ast/query.rs:
##########
@@ -764,11 +765,41 @@ impl fmt::Display for With {
         if self.recursive {
             f.write_str("RECURSIVE ")?;
         }
-        display_comma_separated(&self.cte_tables).fmt(f)?;
+        display_comma_separated(&self.items).fmt(f)?;
         Ok(())
     }
 }
 
+/// A single item in a `WITH` clause.
+#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
+pub enum WithItem {
+    /// A traditional common table expression: `name [(cols)] AS 
[MATERIALIZED] (query)`.

Review Comment:
   ```suggestion
       /// A common table expression.
   ```



##########
src/parser/mod.rs:
##########
@@ -14639,6 +14639,33 @@ impl<'a> Parser<'a> {
         Ok(cte)
     }
 
+    /// Parse a single item in a `WITH` clause.
+    ///
+    /// In standard SQL this is always a CTE (`name [(cols)] AS (query)`).
+    /// Dialects that enable 
[`Dialect::supports_with_clause_scalar_expression`]
+    /// — currently only ClickHouse — also accept the reversed form
+    /// `<expression> AS <identifier>`, which can be freely interleaved with
+    /// CTEs in the same comma-separated list.

Review Comment:
   ```suggestion
       /// Parse a single item in a `WITH` clause.
   ```



##########
src/dialect/mod.rs:
##########
@@ -1745,6 +1745,21 @@ pub trait Dialect: Debug + Any {
         false
     }
 
+    /// Returns true if the dialect allows a `WITH` clause item to bind a
+    /// scalar (or otherwise non-CTE) expression to a name, with the form
+    /// `<expression> AS <identifier>` — alongside or instead of the
+    /// traditional `<identifier> AS (<subquery>)` CTE form.
+    ///
+    /// For example, in ClickHouse:
+    /// ```sql
+    /// WITH 42 AS answer SELECT answer FROM t
+    /// ```
+    ///
+    /// 
[ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions)
+    fn supports_with_clause_scalar_expression(&self) -> bool {

Review Comment:
   ```suggestion
       fn supports_common_scalar_expressions(&self) -> bool {
   ```



##########
src/parser/mod.rs:
##########
@@ -14639,6 +14639,33 @@ impl<'a> Parser<'a> {
         Ok(cte)
     }
 
+    /// Parse a single item in a `WITH` clause.
+    ///
+    /// In standard SQL this is always a CTE (`name [(cols)] AS (query)`).
+    /// Dialects that enable 
[`Dialect::supports_with_clause_scalar_expression`]
+    /// — currently only ClickHouse — also accept the reversed form
+    /// `<expression> AS <identifier>`, which can be freely interleaved with
+    /// CTEs in the same comma-separated list.
+    pub fn parse_with_item(&mut self) -> Result<WithItem, ParserError> {
+        if !self.dialect.supports_with_clause_scalar_expression() {
+            return self.parse_cte().map(WithItem::Cte);
+        }
+
+        // CTE form must start with an identifier. If the leading token
+        // can't begin one (e.g. `42`, `(SELECT …)`, `(x, y) -> …`), this
+        // is unambiguously the named-expression form.
+        if matches!(self.peek_token().token, Token::Word(_)) {
+            if let Some(cte) = self.maybe_parse(|p| p.parse_cte())? {
+                return Ok(WithItem::Cte(cte));
+            }
+        }

Review Comment:
   Can this be simplified to use `peek_subquery_or_cte_start`? e.g.
   
   that we do `if self.dialect.supports() and !self.peek_subquery_or_cte_start 
{ parse expr } else { parse cte }`



##########
src/dialect/mod.rs:
##########
@@ -1745,6 +1745,21 @@ pub trait Dialect: Debug + Any {
         false
     }
 
+    /// Returns true if the dialect allows a `WITH` clause item to bind a
+    /// scalar (or otherwise non-CTE) expression to a name, with the form
+    /// `<expression> AS <identifier>` — alongside or instead of the
+    /// traditional `<identifier> AS (<subquery>)` CTE form.
+    ///
+    /// For example, in ClickHouse:
+    /// ```sql
+    /// WITH 42 AS answer SELECT answer FROM t
+    /// ```
+    ///
+    /// 
[ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions)

Review Comment:
   ```suggestion
       /// Returns true if the dialect supports Common Scalar Expressions in a 
`WITH` clause.
       ///
       /// For example:
       /// ```sql
       /// WITH 42 AS answer SELECT answer FROM t
       /// ```
       ///
       /// 
[ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions)
   ```



##########
src/ast/query.rs:
##########
@@ -754,8 +754,9 @@ pub struct With {
     pub with_token: AttachedToken,
     /// Whether the `WITH` is recursive (`WITH RECURSIVE`).
     pub recursive: bool,
-    /// The list of CTEs declared by this `WITH` clause.
-    pub cte_tables: Vec<Cte>,
+    /// The items declared by this `WITH` clause: traditional CTEs and,
+    /// for dialects that support it, named expressions.

Review Comment:
   ```suggestion
       /// The expressions declared by this `WITH` clause.
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to