spark git commit: [SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals
Repository: spark Updated Branches: refs/heads/branch-2.2 b606dc177 -> 3a692e355 [SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals ## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-21980 This PR fixes the issue in ResolveGroupingAnalytics rule, which indexes the column references in grouping functions without considering case sensitive configurations. The problem can be reproduced by: `val df = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b") df.cube("a").agg(grouping("A")).show()` ## How was this patch tested? unit tests Author: donnyzone Closes #19202 from DonnyZone/ResolveGroupingAnalytics. (cherry picked from commit 21c4450fb24635fab6481a3756fefa9c6f6d6235) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3a692e35 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3a692e35 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3a692e35 Branch: refs/heads/branch-2.2 Commit: 3a692e355a786260c4a9c2ef210fe14e409af37a Parents: b606dc1 Author: donnyzone Authored: Wed Sep 13 10:06:53 2017 -0700 Committer: gatorsmile Committed: Wed Sep 13 10:10:59 2017 -0700 -- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../apache/spark/sql/DataFrameAggregateSuite.scala | 16 2 files changed, 17 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3a692e35/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 50c82f5..c970c20 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -315,7 +315,7 @@ class Analyzer( s"grouping columns (${groupByExprs.mkString(",")})") } case e @ Grouping(col: Expression) => - val idx = groupByExprs.indexOf(col) + val idx = groupByExprs.indexWhere(_.semanticEquals(col)) if (idx >= 0) { Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 1 - idx)), Literal(1)), ByteType), toPrettySQL(e))() http://git-wip-us.apache.org/repos/asf/spark/blob/3a692e35/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index 5f65512..f50c0cf 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -186,6 +186,22 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext { ) } + test("SPARK-21980: References in grouping functions should be indexed with semanticEquals") { +checkAnswer( + courseSales.cube("course", "year") +.agg(grouping("CouRse"), grouping("year")), + Row("Java", 2012, 0, 0) :: +Row("Java", 2013, 0, 0) :: +Row("Java", null, 0, 1) :: +Row("dotNET", 2012, 0, 0) :: +Row("dotNET", 2013, 0, 0) :: +Row("dotNET", null, 0, 1) :: +Row(null, 2012, 1, 0) :: +Row(null, 2013, 1, 0) :: +Row(null, null, 1, 1) :: Nil +) + } + test("rollup overlapping columns") { checkAnswer( testData2.rollup($"a" + $"b" as "foo", $"b" as "bar").agg(sum($"a" - $"b") as "foo"), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals
Repository: spark Updated Branches: refs/heads/master b6ef1f57b -> 21c4450fb [SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals ## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-21980 This PR fixes the issue in ResolveGroupingAnalytics rule, which indexes the column references in grouping functions without considering case sensitive configurations. The problem can be reproduced by: `val df = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b") df.cube("a").agg(grouping("A")).show()` ## How was this patch tested? unit tests Author: donnyzone Closes #19202 from DonnyZone/ResolveGroupingAnalytics. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/21c4450f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/21c4450f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/21c4450f Branch: refs/heads/master Commit: 21c4450fb24635fab6481a3756fefa9c6f6d6235 Parents: b6ef1f5 Author: donnyzone Authored: Wed Sep 13 10:06:53 2017 -0700 Committer: gatorsmile Committed: Wed Sep 13 10:06:53 2017 -0700 -- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../apache/spark/sql/DataFrameAggregateSuite.scala | 16 2 files changed, 17 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/21c4450f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 1e934d0..0880bd6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -314,7 +314,7 @@ class Analyzer( s"grouping columns (${groupByExprs.mkString(",")})") } case e @ Grouping(col: Expression) => - val idx = groupByExprs.indexOf(col) + val idx = groupByExprs.indexWhere(_.semanticEquals(col)) if (idx >= 0) { Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length - 1 - idx)), Literal(1)), ByteType), toPrettySQL(e))() http://git-wip-us.apache.org/repos/asf/spark/blob/21c4450f/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index affe971..8549eac 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -190,6 +190,22 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext { ) } + test("SPARK-21980: References in grouping functions should be indexed with semanticEquals") { +checkAnswer( + courseSales.cube("course", "year") +.agg(grouping("CouRse"), grouping("year")), + Row("Java", 2012, 0, 0) :: +Row("Java", 2013, 0, 0) :: +Row("Java", null, 0, 1) :: +Row("dotNET", 2012, 0, 0) :: +Row("dotNET", 2013, 0, 0) :: +Row("dotNET", null, 0, 1) :: +Row(null, 2012, 1, 0) :: +Row(null, 2013, 1, 0) :: +Row(null, null, 1, 1) :: Nil +) + } + test("rollup overlapping columns") { checkAnswer( testData2.rollup($"a" + $"b" as "foo", $"b" as "bar").agg(sum($"a" - $"b") as "foo"), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org