[ 
https://issues.apache.org/jira/browse/CALCITE-3874?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Justin Swett updated CALCITE-3874:
----------------------------------
    Description: 
When a sub-query is detected, via RelToSqlConverter visit on Project and in 
turn SqlImplementor's builder method, the new aliases used for the new context 
are built from the project's input RowType. This can lead to incorrect 
generated SQL. Consider the following rel:   
{code:java}
-- Including the rowType below each rel  
  LogicalProject(users.id=[$0], orders.count=[$1])
      "users.id" -> {BasicSqlType@} "DECIMAL(19, 0)"
      "orders.count" -> {BasicSqlType@} "BIGINT"
    LogicalFilter(condition=[AND(templatedOpTo_BOOLEAN_2a703bc54fc6($2), 
lookerFirst(asIs(_UTF-8'(COUNT(*)) = 11 OR (COUNT(*)) = 1'), $0, $1, $2))])
        "users.id" -> {BasicSqlType@} "DECIMAL(19, 0)"
        "orders.count" -> {BasicSqlType@} "BIGINT"
        "orders.count_0" -> {BasicSqlType@} "BIGINT"
      LogicalAggregate(group=[{5}], orders.count=[COUNT()], 
orders.count=[COUNT()])         
            "id0" -> {BasicSqlType@} "DECIMAL(19, 0)" 
            "orders.count" -> {BasicSqlType@} "BIGINT"
            "orders.count_0" -> {BasicSqlType@} "BIGINT"
        LogicalJoin(condition=[=($2, $5)], joinType=[left])
                "id" -> {BasicSqlType@} "DECIMAL(19, 0)"
                "status" -> {BasicSqlType@} "VARCHAR"
                "user_id" -> {BasicSqlType@} "DECIMAL(19, 0)"
                "order_amount" -> {BasicSqlType@} "DOUBLE"
                "created_at" -> {BasicSqlType@21558} "TIMESTAMP(0)"
                "id0" -> {BasicSqlType@} "DECIMAL(19, 0)"
                "name" -> {BasicSqlType@} "VARCHAR"
                "age" -> {BasicSqlType@} "DECIMAL(19, 0)"
                "created_at0" -> {BasicSqlType@21558} "TIMESTAMP(0)"
          ExplicitlyAliasedTableScan(table=[[db, orders]])
          ExplicitlyAliasedTableScan(table=[[db, users]])
{code}
 

The generated SQL from this rel is:
{code:java}
SELECT
    `t0`.`users.id`,
    `t0`.`orders.count`
FROM (SELECT
            `users`.`id` AS `id0`,  -- this is coming from Agg 
            COUNT(*) AS `orders.count`,
            COUNT(*) AS `orders.count_0`
        FROM `orders` AS `orders` LEFT JOIN `users` AS `users` ON 
`orders`.`user_id` = `users`.`id`
        GROUP BY
            `users`.`id`
        HAVING ((COUNT(*)  = 11)) AND ((COUNT(*)) = 11 OR (COUNT(*)) = 1)) AS 
`t0`
ORDER BY
    `t0`.`users.id`
{code}
Expected SQL:
{code:java}
SELECT
    `t1`.`users.id`,
    `t1`.`orders.count`
FROM (SELECT
            `users`.`id` AS `users.id`,  <-- aliased correctly
            COUNT(*) AS `orders.count`,
            COUNT(*) AS `orders.count_0`
        FROM `orders` AS `orders` LEFT JOIN `users` AS `users` ON 
`orders`.`user_id` = `users`.`id`
        GROUP BY
            1
        HAVING ((COUNT(*)  = 11)) AND ((COUNT(*)) = 11 OR (COUNT(*)) = 1)) AS 
`t1`
ORDER BY
    `t1`.`users.id`
{code}

I've traced this to [SqlImplementor 
|https://github.com/apache/calcite/blob/3c9e156aea4a246318e1fa9ea299adfc9479e20e/core/src/main/java/org/apache/calcite/rel/rel2sql/SqlImplementor.java#L1380]


  was:
When a sub-query is detected, via RelToSqlConverter visit on Project and in 
turn SqlImplementor's builder method, the new aliases used for the new context 
are built from the project's input RowType. This can lead to incorrect 
generated SQL. Consider the following rel:   
{code:java}
-- Including the rowType below each rel  
  LogicalProject(users.id=[$0], orders.count=[$1])
      "users.id" -> {BasicSqlType@} "DECIMAL(19, 0)"
      "orders.count" -> {BasicSqlType@} "BIGINT"
    LogicalFilter(condition=[AND(templatedOpTo_BOOLEAN_2a703bc54fc6($2), 
lookerFirst(asIs(_UTF-8'(COUNT(*)) = 11 OR (COUNT(*)) = 1'), $0, $1, $2))])
        "users.id" -> {BasicSqlType@} "DECIMAL(19, 0)"
        "orders.count" -> {BasicSqlType@} "BIGINT"
        "orders.count_0" -> {BasicSqlType@} "BIGINT"
      LogicalAggregate(group=[{5}], orders.count=[COUNT()], 
orders.count=[COUNT()])         
            "id0" -> {BasicSqlType@} "DECIMAL(19, 0)" 
            "orders.count" -> {BasicSqlType@} "BIGINT"
            "orders.count_0" -> {BasicSqlType@} "BIGINT"
        LogicalJoin(condition=[=($2, $5)], joinType=[left])
                "id" -> {BasicSqlType@} "DECIMAL(19, 0)"
                "status" -> {BasicSqlType@} "VARCHAR"
                "user_id" -> {BasicSqlType@} "DECIMAL(19, 0)"
                "order_amount" -> {BasicSqlType@} "DOUBLE"
                "created_at" -> {BasicSqlType@21558} "TIMESTAMP(0)"
                "id0" -> {BasicSqlType@} "DECIMAL(19, 0)"
                "name" -> {BasicSqlType@} "VARCHAR"
                "age" -> {BasicSqlType@} "DECIMAL(19, 0)"
                "created_at0" -> {BasicSqlType@21558} "TIMESTAMP(0)"
          ExplicitlyAliasedTableScan(table=[[db, orders]])
          ExplicitlyAliasedTableScan(table=[[db, users]])
{code}
 

The generated SQL from this rel is:
{code:java}
SELECT
    `t0`.`users.id`,
    `t0`.`orders.count`
FROM (SELECT
            `users`.`id` AS `id0`,  -- this is coming from Agg 
            COUNT(*) AS `orders.count`,
            COUNT(*) AS `orders.count_0`
        FROM `orders` AS `orders` LEFT JOIN `users` AS `users` ON 
`orders`.`user_id` = `users`.`id`
        GROUP BY
            `users`.`id`
        HAVING ((COUNT(*)  = 11)) AND ((COUNT(*)) = 11 OR (COUNT(*)) = 1)) AS 
`t0`
ORDER BY
    `t0`.`users.id`
{code}
Expected SQL:
{code:java}
SELECT
    `t1`.`users.id`,
    `t1`.`orders.count`
FROM (SELECT
            `users`.`id` AS `users.id`,  <-- aliased correctly
            COUNT(*) AS `orders.count`,
            COUNT(*) AS `orders.count_0`
        FROM `orders` AS `orders` LEFT JOIN `users` AS `users` ON 
`orders`.`user_id` = `users`.`id`
        GROUP BY
            1
        HAVING ((COUNT(*)  = 11)) AND ((COUNT(*)) = 11 OR (COUNT(*)) = 1)) AS 
`t1`
ORDER BY
    `t1`.`users.id`
LIMIT 10
{code}


> SqlImplementor builder uses wrong context for sub-selects
> ---------------------------------------------------------
>
>                 Key: CALCITE-3874
>                 URL: https://issues.apache.org/jira/browse/CALCITE-3874
>             Project: Calcite
>          Issue Type: Bug
>    Affects Versions: 1.22.0
>         Environment: Running against MySQL.
> This situation arrises specifically from running *AggregateProjectMergeRule* 
> on the following rel:
> {code:java}
> LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10])
>   LogicalProject(users.id=[$0], orders.count=[$1])
>     LogicalFilter(condition=[AND(templatedOpTo_BOOLEAN_2a703bc54fc6($2), 
> lookerFirst(asIs(_UTF-8'(COUNT(*)) = 11 OR (COUNT(*)) = 1'), $0, $1, $2))])
>       LogicalAggregate(group=[{0}], orders.count=[COUNT()], 
> orders.count=[COUNT()])
>            "users.id" -> {BasicSqlType@} "DECIMAL(19, 0)"
>            "orders.count" -> {BasicSqlType@} "BIGINT"
>            "orders.count_0" -> {BasicSqlType@} "BIGINT"
>         LogicalProject(users.id=[$5])
>           LogicalJoin(condition=[=($2, $5)], joinType=[left])
>             ExplicitlyAliasedTableScan(table=[[looker, orders]])
>             ExplicitlyAliasedTableScan(table=[[looker, users]])
> {code}
> producing the rel
> {code:java}
> LogicalSort(sort0=[$0], dir0=[ASC-nulls-first], fetch=[10])
>   LogicalProject(users.id=[$0], orders.count=[$1])
>     LogicalFilter(condition=[AND(templatedOpTo_BOOLEAN_2a703bc54fc6($2), 
> lookerFirst(asIs(_UTF-8'(COUNT(*)) = 11 OR (COUNT(*)) = 1'), $0, $1, $2))])
>       LogicalAggregate(group=[{5}], orders.count=[COUNT()], 
> orders.count=[COUNT()])
>            "id0" -> {BasicSqlType@ } "DECIMAL(19, 0)"
>            "orders.count" -> {BasicSqlType@ } "BIGINT"
>            "orders.count_0" -> {BasicSqlType@ } "BIGINT"
>         LogicalJoin(condition=[=($2, $5)], joinType=[left])
>           ExplicitlyAliasedTableScan(table=[[looker, orders]])
>           ExplicitlyAliasedTableScan(table=[[looker, users]])
> {code}
>            Reporter: Justin Swett
>            Priority: Minor
>
> When a sub-query is detected, via RelToSqlConverter visit on Project and in 
> turn SqlImplementor's builder method, the new aliases used for the new 
> context are built from the project's input RowType. This can lead to 
> incorrect generated SQL. Consider the following rel:   
> {code:java}
> -- Including the rowType below each rel  
>   LogicalProject(users.id=[$0], orders.count=[$1])
>       "users.id" -> {BasicSqlType@} "DECIMAL(19, 0)"
>       "orders.count" -> {BasicSqlType@} "BIGINT"
>     LogicalFilter(condition=[AND(templatedOpTo_BOOLEAN_2a703bc54fc6($2), 
> lookerFirst(asIs(_UTF-8'(COUNT(*)) = 11 OR (COUNT(*)) = 1'), $0, $1, $2))])
>         "users.id" -> {BasicSqlType@} "DECIMAL(19, 0)"
>         "orders.count" -> {BasicSqlType@} "BIGINT"
>         "orders.count_0" -> {BasicSqlType@} "BIGINT"
>       LogicalAggregate(group=[{5}], orders.count=[COUNT()], 
> orders.count=[COUNT()])         
>             "id0" -> {BasicSqlType@} "DECIMAL(19, 0)" 
>             "orders.count" -> {BasicSqlType@} "BIGINT"
>             "orders.count_0" -> {BasicSqlType@} "BIGINT"
>         LogicalJoin(condition=[=($2, $5)], joinType=[left])
>                 "id" -> {BasicSqlType@} "DECIMAL(19, 0)"
>                 "status" -> {BasicSqlType@} "VARCHAR"
>                 "user_id" -> {BasicSqlType@} "DECIMAL(19, 0)"
>                 "order_amount" -> {BasicSqlType@} "DOUBLE"
>                 "created_at" -> {BasicSqlType@21558} "TIMESTAMP(0)"
>                 "id0" -> {BasicSqlType@} "DECIMAL(19, 0)"
>                 "name" -> {BasicSqlType@} "VARCHAR"
>                 "age" -> {BasicSqlType@} "DECIMAL(19, 0)"
>                 "created_at0" -> {BasicSqlType@21558} "TIMESTAMP(0)"
>           ExplicitlyAliasedTableScan(table=[[db, orders]])
>           ExplicitlyAliasedTableScan(table=[[db, users]])
> {code}
>  
> The generated SQL from this rel is:
> {code:java}
> SELECT
>     `t0`.`users.id`,
>     `t0`.`orders.count`
> FROM (SELECT
>             `users`.`id` AS `id0`,  -- this is coming from Agg 
>             COUNT(*) AS `orders.count`,
>             COUNT(*) AS `orders.count_0`
>         FROM `orders` AS `orders` LEFT JOIN `users` AS `users` ON 
> `orders`.`user_id` = `users`.`id`
>         GROUP BY
>             `users`.`id`
>         HAVING ((COUNT(*)  = 11)) AND ((COUNT(*)) = 11 OR (COUNT(*)) = 1)) AS 
> `t0`
> ORDER BY
>     `t0`.`users.id`
> {code}
> Expected SQL:
> {code:java}
> SELECT
>     `t1`.`users.id`,
>     `t1`.`orders.count`
> FROM (SELECT
>             `users`.`id` AS `users.id`,  <-- aliased correctly
>             COUNT(*) AS `orders.count`,
>             COUNT(*) AS `orders.count_0`
>         FROM `orders` AS `orders` LEFT JOIN `users` AS `users` ON 
> `orders`.`user_id` = `users`.`id`
>         GROUP BY
>             1
>         HAVING ((COUNT(*)  = 11)) AND ((COUNT(*)) = 11 OR (COUNT(*)) = 1)) AS 
> `t1`
> ORDER BY
>     `t1`.`users.id`
> {code}
> I've traced this to [SqlImplementor 
> |https://github.com/apache/calcite/blob/3c9e156aea4a246318e1fa9ea299adfc9479e20e/core/src/main/java/org/apache/calcite/rel/rel2sql/SqlImplementor.java#L1380]



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to