Repository: spark Updated Branches: refs/heads/branch-1.6 975ae4e6f -> be2c18650
[SPARK-10656][SQL] completely support special chars in DataFrame the main problem is: we interpret column name with special handling of `.` for DataFrame. This enables us to write something like `df("a.b")` to get the field `b` of `a`. However, we don't need this feature in `DataFrame.apply("*")` or `DataFrame.withColumnRenamed`. In these 2 cases, the column name is the final name already, we don't need extra process to interpret it. The solution is simple, use `queryExecution.analyzed.output` to get resolved column directly, instead of using `DataFrame.resolve`. close https://github.com/apache/spark/pull/8811 Author: Wenchen Fan <wenc...@databricks.com> Closes #9462 from cloud-fan/special-chars. (cherry picked from commit d9e30c59cede7f57786bb19e64ba422eda43bdcb) Signed-off-by: Yin Huai <yh...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be2c1865 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be2c1865 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be2c1865 Branch: refs/heads/branch-1.6 Commit: be2c186500bc4225a4aba89f9724443ed8721234 Parents: 975ae4e Author: Wenchen Fan <wenc...@databricks.com> Authored: Thu Nov 5 14:53:16 2015 -0800 Committer: Yin Huai <yh...@databricks.com> Committed: Thu Nov 5 14:53:26 2015 -0800 ---------------------------------------------------------------------- .../main/scala/org/apache/spark/sql/DataFrame.scala | 16 ++++++++++------ .../scala/org/apache/spark/sql/DataFrameSuite.scala | 6 ++++++ 2 files changed, 16 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/be2c1865/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 6336dee..f2d4db5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -698,7 +698,7 @@ class DataFrame private[sql]( */ def col(colName: String): Column = colName match { case "*" => - Column(ResolvedStar(schema.fieldNames.map(resolve))) + Column(ResolvedStar(queryExecution.analyzed.output)) case _ => val expr = resolve(colName) Column(expr) @@ -1259,13 +1259,17 @@ class DataFrame private[sql]( */ def withColumnRenamed(existingName: String, newName: String): DataFrame = { val resolver = sqlContext.analyzer.resolver - val shouldRename = schema.exists(f => resolver(f.name, existingName)) + val output = queryExecution.analyzed.output + val shouldRename = output.exists(f => resolver(f.name, existingName)) if (shouldRename) { - val colNames = schema.map { field => - val name = field.name - if (resolver(name, existingName)) Column(name).as(newName) else Column(name) + val columns = output.map { col => + if (resolver(col.name, existingName)) { + Column(col).as(newName) + } else { + Column(col) + } } - select(colNames : _*) + select(columns : _*) } else { this } http://git-wip-us.apache.org/repos/asf/spark/blob/be2c1865/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 84a616d..f3a7aa2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -1128,4 +1128,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } } } + + test("SPARK-10656: completely support special chars") { + val df = Seq(1 -> "a").toDF("i_$.a", "d^'a.") + checkAnswer(df.select(df("*")), Row(1, "a")) + checkAnswer(df.withColumnRenamed("d^'a.", "a"), Row(1, "a")) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org