Repository: spark
Updated Branches:
  refs/heads/master 1202075c9 -> b72486f82


[SPARK-17458][SQL] Alias specified for aggregates in a pivot are not honored

## What changes were proposed in this pull request?

This change preserves aliases that are given for pivot aggregations

## How was this patch tested?

New unit test

Author: Andrew Ray <ray.and...@gmail.com>

Closes #15111 from aray/SPARK-17458.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b72486f8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b72486f8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b72486f8

Branch: refs/heads/master
Commit: b72486f82dd9920135442191be5d384028e7fb41
Parents: 1202075
Author: Andrew Ray <ray.and...@gmail.com>
Authored: Thu Sep 15 21:45:29 2016 +0200
Committer: Herman van Hovell <hvanhov...@databricks.com>
Committed: Thu Sep 15 21:45:29 2016 +0200

----------------------------------------------------------------------
 .../apache/spark/sql/catalyst/analysis/Analyzer.scala    | 10 +++++++++-
 .../scala/org/apache/spark/sql/DataFramePivotSuite.scala | 11 +++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b72486f8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 92bf8e0..5210f42 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -373,7 +373,15 @@ class Analyzer(
       case Pivot(groupByExprs, pivotColumn, pivotValues, aggregates, child) =>
         val singleAgg = aggregates.size == 1
         def outputName(value: Literal, aggregate: Expression): String = {
-          if (singleAgg) value.toString else value + "_" + aggregate.sql
+          if (singleAgg) {
+            value.toString
+          } else {
+            val suffix = aggregate match {
+              case n: NamedExpression => n.name
+              case _ => aggregate.sql
+            }
+            value + "_" + suffix
+          }
         }
         if (aggregates.forall(a => PivotFirst.supportsDataType(a.dataType))) {
           // Since evaluating |pivotValues| if statements for each input row 
can get slow this is an

http://git-wip-us.apache.org/repos/asf/spark/blob/b72486f8/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index d5cb5e1..1bbe135 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -197,4 +197,15 @@ class DataFramePivotSuite extends QueryTest with 
SharedSQLContext{
         Row(2013, Seq(48000.0, 7.0), Seq(30000.0, 7.0)) :: Nil
     )
   }
+
+  test("pivot preserves aliases if given") {
+    assertResult(
+      Array("year", "dotNET_foo", "dotNET_avg(`earnings`)", "Java_foo", 
"Java_avg(`earnings`)")
+    )(
+      courseSales.groupBy($"year")
+        .pivot("course", Seq("dotNET", "Java"))
+        .agg(sum($"earnings").as("foo"), avg($"earnings")).columns
+    )
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to