Repository: spark
Updated Branches:
  refs/heads/branch-2.3 de66abafc -> 4059454f9


[SPARK-23199][SQL] improved Removes repetition from group expressions in 
Aggregate

## What changes were proposed in this pull request?

Currently, all Aggregate operations will go into 
RemoveRepetitionFromGroupExpressions, but there is no group expression or there 
is no duplicate group expression in group expression, we not need copy for 
logic plan.

## How was this patch tested?

the existed test case.

Author: caoxuewen <cao.xue...@zte.com.cn>

Closes #20375 from heary-cao/RepetitionGroupExpressions.

(cherry picked from commit 54dd7cf4ef921bc9dc12f99cfb90d1da57939901)
Signed-off-by: gatorsmile <gatorsm...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4059454f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4059454f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4059454f

Branch: refs/heads/branch-2.3
Commit: 4059454f979874caa9745861a2bcc60cac0bbffd
Parents: de66aba
Author: caoxuewen <cao.xue...@zte.com.cn>
Authored: Mon Jan 29 08:56:42 2018 -0800
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Mon Jan 29 08:56:52 2018 -0800

----------------------------------------------------------------------
 .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala  | 8 ++++++--
 .../sql/catalyst/optimizer/AggregateOptimizeSuite.scala      | 5 ++---
 2 files changed, 8 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/4059454f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 8d20770..a28b6a0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -1302,8 +1302,12 @@ object RemoveLiteralFromGroupExpressions extends 
Rule[LogicalPlan] {
  */
 object RemoveRepetitionFromGroupExpressions extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
-    case a @ Aggregate(grouping, _, _) =>
+    case a @ Aggregate(grouping, _, _) if grouping.size > 1 =>
       val newGrouping = ExpressionSet(grouping).toSeq
-      a.copy(groupingExpressions = newGrouping)
+      if (newGrouping.size == grouping.size) {
+        a
+      } else {
+        a.copy(groupingExpressions = newGrouping)
+      }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/4059454f/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
index a3184a4..f8ddc93 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
@@ -67,10 +67,9 @@ class AggregateOptimizeSuite extends PlanTest {
   }
 
   test("remove repetition in grouping expression") {
-    val input = LocalRelation('a.int, 'b.int, 'c.int)
-    val query = input.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 
'B)(sum('c))
+    val query = testRelation.groupBy('a + 1, 'b + 2, Literal(1) + 'A, 
Literal(2) + 'B)(sum('c))
     val optimized = Optimize.execute(analyzer.execute(query))
-    val correctAnswer = input.groupBy('a + 1, 'b + 2)(sum('c)).analyze
+    val correctAnswer = testRelation.groupBy('a + 1, 'b + 2)(sum('c)).analyze
 
     comparePlans(optimized, correctAnswer)
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to