[ 
https://issues.apache.org/jira/browse/SPARK-31670?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

angerszhu updated SPARK-31670:
------------------------------
    Description: 
Will wrong with below SQL
{code:java}
test("TEST STRUCT FIELD WITH GROUP BY with CUBE") {
  withTable("t1") {
    sql(
      """create table t1(
        |a string,
        |b int,
        |c array<struct<row_id:int,json_string:string>>)
        |using orc""".stripMargin)

    sql(
      """
        |select a, coalesce(get_json_object(each.json_string,'$.iType'),'-127') 
as iType, sum(b)
        |from t1
        |LATERAL VIEW explode(c) x AS each
        |group by a, get_json_object(each.json_string,'$.iType')
        |with cube
        |""".stripMargin).explain(true)
  }
}
{code}
ErrorĀ 
{code:java}
expression 'x.`each`' is neither present in the group by, nor is it an 
aggregate function. Add to group by or wrap in first() (or first_value) if you 
don't care which value you get.;;
Aggregate [a#230, get_json_object(each#222.json_string AS json_string#223, 
$.iType)#231, spark_grouping_id#229L], [a#230, 
coalesce(get_json_object(each#222.json_string, $.iType), -127) AS iType#218, 
sum(cast(b#220 as bigint)) AS sum(b)#226L]
+- Expand [List(a#219, b#220, c#221, each#222, a#227, 
get_json_object(each#222.json_string AS json_string#223, $.iType)#228, 0), 
List(a#219, b#220, c#221, each#222, a#227, null, 1), List(a#219, b#220, c#221, 
each#222, null, get_json_object(each#222.json_string AS json_string#223, 
$.iType)#228, 2), List(a#219, b#220, c#221, each#222, null, null, 3)], [a#219, 
b#220, c#221, each#222, a#230, get_json_object(each#222.json_string AS 
json_string#223, $.iType)#231, spark_grouping_id#229L]
   +- Project [a#219, b#220, c#221, each#222, a#219 AS a#227, 
get_json_object(each#222.json_string, $.iType) AS 
get_json_object(each#222.json_string AS json_string#223, $.iType)#228]
      +- Generate explode(c#221), false, x, [each#222]
         +- SubqueryAlias spark_catalog.default.t1
            +- Relation[a#219,b#220,c#221] orc
{code}

> Using complex type in Aggregation with cube failed Analysis error
> -----------------------------------------------------------------
>
>                 Key: SPARK-31670
>                 URL: https://issues.apache.org/jira/browse/SPARK-31670
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.4.0, 3.0.0
>            Reporter: angerszhu
>            Priority: Major
>
> Will wrong with below SQL
> {code:java}
> test("TEST STRUCT FIELD WITH GROUP BY with CUBE") {
>   withTable("t1") {
>     sql(
>       """create table t1(
>         |a string,
>         |b int,
>         |c array<struct<row_id:int,json_string:string>>)
>         |using orc""".stripMargin)
>     sql(
>       """
>         |select a, 
> coalesce(get_json_object(each.json_string,'$.iType'),'-127') as iType, sum(b)
>         |from t1
>         |LATERAL VIEW explode(c) x AS each
>         |group by a, get_json_object(each.json_string,'$.iType')
>         |with cube
>         |""".stripMargin).explain(true)
>   }
> }
> {code}
> ErrorĀ 
> {code:java}
> expression 'x.`each`' is neither present in the group by, nor is it an 
> aggregate function. Add to group by or wrap in first() (or first_value) if 
> you don't care which value you get.;;
> Aggregate [a#230, get_json_object(each#222.json_string AS json_string#223, 
> $.iType)#231, spark_grouping_id#229L], [a#230, 
> coalesce(get_json_object(each#222.json_string, $.iType), -127) AS iType#218, 
> sum(cast(b#220 as bigint)) AS sum(b)#226L]
> +- Expand [List(a#219, b#220, c#221, each#222, a#227, 
> get_json_object(each#222.json_string AS json_string#223, $.iType)#228, 0), 
> List(a#219, b#220, c#221, each#222, a#227, null, 1), List(a#219, b#220, 
> c#221, each#222, null, get_json_object(each#222.json_string AS 
> json_string#223, $.iType)#228, 2), List(a#219, b#220, c#221, each#222, null, 
> null, 3)], [a#219, b#220, c#221, each#222, a#230, 
> get_json_object(each#222.json_string AS json_string#223, $.iType)#231, 
> spark_grouping_id#229L]
>    +- Project [a#219, b#220, c#221, each#222, a#219 AS a#227, 
> get_json_object(each#222.json_string, $.iType) AS 
> get_json_object(each#222.json_string AS json_string#223, $.iType)#228]
>       +- Generate explode(c#221), false, x, [each#222]
>          +- SubqueryAlias spark_catalog.default.t1
>             +- Relation[a#219,b#220,c#221] orc
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to