spark git commit: [SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals

2017-09-13 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 b606dc177 -> 3a692e355


[SPARK-21980][SQL] References in grouping functions should be indexed with 
semanticEquals

## What changes were proposed in this pull request?

https://issues.apache.org/jira/browse/SPARK-21980

This PR fixes the issue in ResolveGroupingAnalytics rule, which indexes the 
column references in grouping functions without considering case sensitive 
configurations.

The problem can be reproduced by:

`val df = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b")
 df.cube("a").agg(grouping("A")).show()`

## How was this patch tested?
unit tests

Author: donnyzone 

Closes #19202 from DonnyZone/ResolveGroupingAnalytics.

(cherry picked from commit 21c4450fb24635fab6481a3756fefa9c6f6d6235)
Signed-off-by: gatorsmile 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3a692e35
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3a692e35
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3a692e35

Branch: refs/heads/branch-2.2
Commit: 3a692e355a786260c4a9c2ef210fe14e409af37a
Parents: b606dc1
Author: donnyzone 
Authored: Wed Sep 13 10:06:53 2017 -0700
Committer: gatorsmile 
Committed: Wed Sep 13 10:10:59 2017 -0700

--
 .../spark/sql/catalyst/analysis/Analyzer.scala  |  2 +-
 .../apache/spark/sql/DataFrameAggregateSuite.scala  | 16 
 2 files changed, 17 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3a692e35/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 50c82f5..c970c20 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -315,7 +315,7 @@ class Analyzer(
 s"grouping columns (${groupByExprs.mkString(",")})")
   }
 case e @ Grouping(col: Expression) =>
-  val idx = groupByExprs.indexOf(col)
+  val idx = groupByExprs.indexWhere(_.semanticEquals(col))
   if (idx >= 0) {
 Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length 
- 1 - idx)),
   Literal(1)), ByteType), toPrettySQL(e))()

http://git-wip-us.apache.org/repos/asf/spark/blob/3a692e35/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 5f65512..f50c0cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -186,6 +186,22 @@ class DataFrameAggregateSuite extends QueryTest with 
SharedSQLContext {
 )
   }
 
+  test("SPARK-21980: References in grouping functions should be indexed with 
semanticEquals") {
+checkAnswer(
+  courseSales.cube("course", "year")
+.agg(grouping("CouRse"), grouping("year")),
+  Row("Java", 2012, 0, 0) ::
+Row("Java", 2013, 0, 0) ::
+Row("Java", null, 0, 1) ::
+Row("dotNET", 2012, 0, 0) ::
+Row("dotNET", 2013, 0, 0) ::
+Row("dotNET", null, 0, 1) ::
+Row(null, 2012, 1, 0) ::
+Row(null, 2013, 1, 0) ::
+Row(null, null, 1, 1) :: Nil
+)
+  }
+
   test("rollup overlapping columns") {
 checkAnswer(
   testData2.rollup($"a" + $"b" as "foo", $"b" as "bar").agg(sum($"a" - 
$"b") as "foo"),


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-21980][SQL] References in grouping functions should be indexed with semanticEquals

2017-09-13 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master b6ef1f57b -> 21c4450fb


[SPARK-21980][SQL] References in grouping functions should be indexed with 
semanticEquals

## What changes were proposed in this pull request?

https://issues.apache.org/jira/browse/SPARK-21980

This PR fixes the issue in ResolveGroupingAnalytics rule, which indexes the 
column references in grouping functions without considering case sensitive 
configurations.

The problem can be reproduced by:

`val df = spark.createDataFrame(Seq((1, 1), (2, 1), (2, 2))).toDF("a", "b")
 df.cube("a").agg(grouping("A")).show()`

## How was this patch tested?
unit tests

Author: donnyzone 

Closes #19202 from DonnyZone/ResolveGroupingAnalytics.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/21c4450f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/21c4450f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/21c4450f

Branch: refs/heads/master
Commit: 21c4450fb24635fab6481a3756fefa9c6f6d6235
Parents: b6ef1f5
Author: donnyzone 
Authored: Wed Sep 13 10:06:53 2017 -0700
Committer: gatorsmile 
Committed: Wed Sep 13 10:06:53 2017 -0700

--
 .../spark/sql/catalyst/analysis/Analyzer.scala  |  2 +-
 .../apache/spark/sql/DataFrameAggregateSuite.scala  | 16 
 2 files changed, 17 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/21c4450f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 1e934d0..0880bd6 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -314,7 +314,7 @@ class Analyzer(
 s"grouping columns (${groupByExprs.mkString(",")})")
   }
 case e @ Grouping(col: Expression) =>
-  val idx = groupByExprs.indexOf(col)
+  val idx = groupByExprs.indexWhere(_.semanticEquals(col))
   if (idx >= 0) {
 Alias(Cast(BitwiseAnd(ShiftRight(gid, Literal(groupByExprs.length 
- 1 - idx)),
   Literal(1)), ByteType), toPrettySQL(e))()

http://git-wip-us.apache.org/repos/asf/spark/blob/21c4450f/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index affe971..8549eac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -190,6 +190,22 @@ class DataFrameAggregateSuite extends QueryTest with 
SharedSQLContext {
 )
   }
 
+  test("SPARK-21980: References in grouping functions should be indexed with 
semanticEquals") {
+checkAnswer(
+  courseSales.cube("course", "year")
+.agg(grouping("CouRse"), grouping("year")),
+  Row("Java", 2012, 0, 0) ::
+Row("Java", 2013, 0, 0) ::
+Row("Java", null, 0, 1) ::
+Row("dotNET", 2012, 0, 0) ::
+Row("dotNET", 2013, 0, 0) ::
+Row("dotNET", null, 0, 1) ::
+Row(null, 2012, 1, 0) ::
+Row(null, 2013, 1, 0) ::
+Row(null, null, 1, 1) :: Nil
+)
+  }
+
   test("rollup overlapping columns") {
 checkAnswer(
   testData2.rollup($"a" + $"b" as "foo", $"b" as "bar").agg(sum($"a" - 
$"b") as "foo"),


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org