Repository: spark Updated Branches: refs/heads/branch-2.1 042e32d18 -> 23944d0d6
[SPARK-17237][SQL] Remove backticks in a pivot result schema ## What changes were proposed in this pull request? Pivoting adds backticks (e.g. 3_count(\`c\`)) in column names and, in some cases, thes causes analysis exceptions like; ``` scala> val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y") scala> df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0) org.apache.spark.sql.AnalysisException: syntax error in attribute name: `3_count(`y`)`; at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$.e$1(unresolved.scala:134) at org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute$.parseAttributeName(unresolved.scala:144) ... ``` So, this pr proposes to remove these backticks from column names. ## How was this patch tested? Added a test in `DataFrameAggregateSuite`. Author: Takeshi YAMAMURO <linguin....@gmail.com> Closes #14812 from maropu/SPARK-17237. (cherry picked from commit 5585ed93b09bc05cdd7a731650eca50d43d7159b) Signed-off-by: gatorsmile <gatorsm...@gmail.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/23944d0d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/23944d0d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/23944d0d Branch: refs/heads/branch-2.1 Commit: 23944d0d64a07d29e9bfcb8f8d6d22858ec02aef Parents: 042e32d Author: Takeshi YAMAMURO <linguin....@gmail.com> Authored: Thu Jan 12 09:46:53 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Thu Jan 12 09:47:09 2017 -0800 ---------------------------------------------------------------------- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../scala/org/apache/spark/sql/DataFrameAggregateSuite.scala | 8 ++++++++ .../scala/org/apache/spark/sql/DataFramePivotSuite.scala | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/23944d0d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index ab9de02..f873996 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -383,7 +383,7 @@ class Analyzer( } else { val suffix = aggregate match { case n: NamedExpression => n.name - case _ => aggregate.sql + case _ => toPrettySQL(aggregate) } value + "_" + suffix } http://git-wip-us.apache.org/repos/asf/spark/blob/23944d0d/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index 7853b22fe..e707912 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -530,4 +530,12 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext { limit2Df.groupBy("id").count().select($"id"), limit2Df.select($"id")) } + + test("SPARK-17237 remove backticks in a pivot result schema") { + val df = Seq((2, 3, 4), (3, 4, 5)).toDF("a", "x", "y") + checkAnswer( + df.groupBy("a").pivot("x").agg(count("y"), avg("y")).na.fill(0), + Seq(Row(3, 0, 0.0, 1, 5.0), Row(2, 1, 4.0, 0, 0.0)) + ) + } } http://git-wip-us.apache.org/repos/asf/spark/blob/23944d0d/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala index a8d854c..51ffe34 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala @@ -200,7 +200,7 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{ test("pivot preserves aliases if given") { assertResult( - Array("year", "dotNET_foo", "dotNET_avg(`earnings`)", "Java_foo", "Java_avg(`earnings`)") + Array("year", "dotNET_foo", "dotNET_avg(earnings)", "Java_foo", "Java_avg(earnings)") )( courseSales.groupBy($"year") .pivot("course", Seq("dotNET", "Java")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org