Github user aray commented on a diff in the pull request: https://github.com/apache/spark/pull/9429#discussion_r43810369 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala --- @@ -205,45 +205,30 @@ class Analyzer( GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations) case x: GroupingSets => val gid = AttributeReference(VirtualColumn.groupingIdName, IntegerType, false)() - // We will insert another Projection if the GROUP BY keys contains the - // non-attribute expressions. And the top operators can references those - // expressions by its alias. - // e.g. SELECT key%5 as c1 FROM src GROUP BY key%5 ==> - // SELECT a as c1 FROM (SELECT key%5 AS a FROM src) GROUP BY a - - // find all of the non-attribute expressions in the GROUP BY keys - val nonAttributeGroupByExpressions = new ArrayBuffer[Alias]() - - // The pair of (the original GROUP BY key, associated attribute) - val groupByExprPairs = x.groupByExprs.map(_ match { - case e: NamedExpression => (e, e.toAttribute) - case other => { - val alias = Alias(other, other.toString)() - nonAttributeGroupByExpressions += alias // add the non-attributes expression alias - (other, alias.toAttribute) - } - }) - // substitute the non-attribute expressions for aggregations. - val aggregation = x.aggregations.map(expr => expr.transformDown { - case e => groupByExprPairs.find(_._1.semanticEquals(e)).map(_._2).getOrElse(e) - }.asInstanceOf[NamedExpression]) + val aliasedGroupByExprPairs = x.groupByExprs.map{ + case a @ Alias(expr, _) => (expr, a) + case expr: NamedExpression => (expr, Alias(expr, expr.name)()) + case expr => (expr, Alias(expr, expr.prettyString)()) + } - // substitute the group by expressions. - val newGroupByExprs = groupByExprPairs.map(_._2) + val aliasedGroupByExprs = aliasedGroupByExprPairs.map(_._2) + val aliasedGroupByAttr = aliasedGroupByExprs.map(_.toAttribute) - val child = if (nonAttributeGroupByExpressions.length > 0) { - // insert additional projection if contains the - // non-attribute expressions in the GROUP BY keys - Project(x.child.output ++ nonAttributeGroupByExpressions, x.child) - } else { - x.child + // substitute group by expressions in aggregation list with appropriate attribute + val aggregations = x.aggregations.map{ --- End diff -- @chenghao-intel actually that change would bring back the bug in question since it would do the substitutions in situations like below and the aggregations would be computed off the manipulated (nulls inserted) values. ``` select a + b, c, sum(a+b) + count(c) from t1 group by a + b, c with rollup ``` In general anything below an AggregateExpression we don't want to transform, but above we do. So really I need a transformDownUntil method. BTW making this change does fix the `groupby_grouping_sets1` test so I really do need to do something.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org