AngersZhuuuu commented on a change in pull request #30144: URL: https://github.com/apache/spark/pull/30144#discussion_r604725030
########## File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala ########## @@ -212,3 +212,29 @@ object GroupingID { if (SQLConf.get.integerGroupingIdEnabled) IntegerType else LongType } } + + +object GroupingAnalytics { + def unapply(exprs: Seq[Expression]): + Option[(Seq[Seq[Expression]], Seq[Seq[Expression]], Seq[Expression])] = { + + val (groupingSets, others) = exprs.partition(_.isInstanceOf[GroupingSet]) + if (groupingSets.isEmpty) { + None + } else { + val groups = + groupingSets.flatMap(_.asInstanceOf[GroupingSet].groupByExprs) ++ others + val selectedGroupByExprs = + groupingSets.map(_.asInstanceOf[GroupingSet].selectedGroupByExprs) + .foldRight(Seq.empty[Seq[Expression]]) { (x, y) => + if (y.isEmpty) { Review comment: > We need this check? We cannot write it like this? > > ``` > .foldRight(Seq.empty[Seq[Expression]]) { (x, y) => > for (a <- x; b <- y) yield b ++ a > }.map(others ++ _).map(_.distinct) > ``` Can't, since `foldRight(Seq.empty[Seq[Expresstion]])`, this empty Seq should be handled. Is there any other Scala collection can avoid this problem? ########## File path: sql/core/src/test/resources/sql-tests/inputs/group-analytics.sql ########## @@ -69,4 +69,17 @@ SELECT course, year FROM courseSales GROUP BY CUBE(course, year) ORDER BY groupi -- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS SELECT a + b AS k1, b AS k2, SUM(a - b) FROM testData GROUP BY CUBE(k1, k2); SELECT a + b AS k, b, SUM(a - b) FROM testData GROUP BY ROLLUP(k, b); -SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k) +SELECT a + b, b AS k, SUM(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k); + +-- GROUP BY use mixed Separate columns and CUBE/ROLLUP/Gr +SELECT a, b, count(1) FROM testData GROUP BY a, b, CUBE(a, b); +SELECT a, b, count(1) FROM testData GROUP BY a, b, ROLLUP(a, b); +SELECT a, b, count(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b); +SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b); +SELECT a, b, count(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ()); +SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ()); +SELECT a, b, count(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ()); + Review comment: > nit: we don't need the two blank lines. Done ########## File path: sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala ########## @@ -3714,6 +3714,32 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } + test("SPARK-33229: Support GROUP BY use Separate columns and CUBE/ROLLUP") { Review comment: > We still need this test? Removed ########## File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala ########## @@ -212,3 +212,29 @@ object GroupingID { if (SQLConf.get.integerGroupingIdEnabled) IntegerType else LongType } } + + +object GroupingAnalytics { + def unapply(exprs: Seq[Expression]): + Option[(Seq[Seq[Expression]], Seq[Seq[Expression]], Seq[Expression])] = { + + val (groupingSets, others) = exprs.partition(_.isInstanceOf[GroupingSet]) + if (groupingSets.isEmpty) { + None + } else { + val groups = + groupingSets.flatMap(_.asInstanceOf[GroupingSet].groupByExprs) ++ others Review comment: > Since the cast `.asInstanceOf[GroupingSet]` appears three times, could you cast it only once at the beginning? Done ########## File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala ########## @@ -212,3 +212,29 @@ object GroupingID { if (SQLConf.get.integerGroupingIdEnabled) IntegerType else LongType } } + + +object GroupingAnalytics { + def unapply(exprs: Seq[Expression]): + Option[(Seq[Seq[Expression]], Seq[Seq[Expression]], Seq[Expression])] = { + Review comment: > nit: remove the unnecessary blank. DOne ########## File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/grouping.scala ########## @@ -212,3 +212,29 @@ object GroupingID { if (SQLConf.get.integerGroupingIdEnabled) IntegerType else LongType } } + + +object GroupingAnalytics { + def unapply(exprs: Seq[Expression]): + Option[(Seq[Seq[Expression]], Seq[Seq[Expression]], Seq[Expression])] = { Review comment: > ``` > def unapply(exprs: Seq[Expression]) > : Option[(Seq[Seq[Expression]], Seq[Seq[Expression]], Seq[Expression])] = { > ``` Done -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org