Repository: spark Updated Branches: refs/heads/master 1202075c9 -> b72486f82
[SPARK-17458][SQL] Alias specified for aggregates in a pivot are not honored ## What changes were proposed in this pull request? This change preserves aliases that are given for pivot aggregations ## How was this patch tested? New unit test Author: Andrew Ray <ray.and...@gmail.com> Closes #15111 from aray/SPARK-17458. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b72486f8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b72486f8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b72486f8 Branch: refs/heads/master Commit: b72486f82dd9920135442191be5d384028e7fb41 Parents: 1202075 Author: Andrew Ray <ray.and...@gmail.com> Authored: Thu Sep 15 21:45:29 2016 +0200 Committer: Herman van Hovell <hvanhov...@databricks.com> Committed: Thu Sep 15 21:45:29 2016 +0200 ---------------------------------------------------------------------- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 10 +++++++++- .../scala/org/apache/spark/sql/DataFramePivotSuite.scala | 11 +++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/b72486f8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 92bf8e0..5210f42 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -373,7 +373,15 @@ class Analyzer( case Pivot(groupByExprs, pivotColumn, pivotValues, aggregates, child) => val singleAgg = aggregates.size == 1 def outputName(value: Literal, aggregate: Expression): String = { - if (singleAgg) value.toString else value + "_" + aggregate.sql + if (singleAgg) { + value.toString + } else { + val suffix = aggregate match { + case n: NamedExpression => n.name + case _ => aggregate.sql + } + value + "_" + suffix + } } if (aggregates.forall(a => PivotFirst.supportsDataType(a.dataType))) { // Since evaluating |pivotValues| if statements for each input row can get slow this is an http://git-wip-us.apache.org/repos/asf/spark/blob/b72486f8/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala index d5cb5e1..1bbe135 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala @@ -197,4 +197,15 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{ Row(2013, Seq(48000.0, 7.0), Seq(30000.0, 7.0)) :: Nil ) } + + test("pivot preserves aliases if given") { + assertResult( + Array("year", "dotNET_foo", "dotNET_avg(`earnings`)", "Java_foo", "Java_avg(`earnings`)") + )( + courseSales.groupBy($"year") + .pivot("course", Seq("dotNET", "Java")) + .agg(sum($"earnings").as("foo"), avg($"earnings")).columns + ) + } + } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org