Repository: spark Updated Branches: refs/heads/master 2c5b18fb0 -> 17a3f0067
[SPARK-14000][SQL] case class with a tuple field can't work in Dataset ## What changes were proposed in this pull request? When we validate an encoder, we may call `dataType` on unresolved expressions. This PR fix the validation so that we will resolve attributes first. ## How was this patch tested? a new test in `DatasetSuite` Author: Wenchen Fan <wenc...@databricks.com> Closes #11816 from cloud-fan/encoder. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/17a3f006 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/17a3f006 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/17a3f006 Branch: refs/heads/master Commit: 17a3f00676ca02155557f6ee55a1565e96893792 Parents: 2c5b18f Author: Wenchen Fan <wenc...@databricks.com> Authored: Mon Mar 21 22:22:15 2016 +0800 Committer: Cheng Lian <l...@databricks.com> Committed: Mon Mar 21 22:22:15 2016 +0800 ---------------------------------------------------------------------- .../apache/spark/sql/catalyst/analysis/Analyzer.scala | 5 ++++- .../sql/catalyst/encoders/ExpressionEncoder.scala | 10 ++++++++-- .../catalyst/expressions/complexTypeExtractors.scala | 7 ++++++- .../test/scala/org/apache/spark/sql/DatasetSuite.scala | 13 +++++++++++-- 4 files changed, 29 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/17a3f006/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 333a54e..ccc65b4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -600,7 +600,10 @@ class Analyzer( exprs.exists(_.collect { case _: Star => true }.nonEmpty) } - private def resolveExpression(expr: Expression, plan: LogicalPlan, throws: Boolean = false) = { + protected[sql] def resolveExpression( + expr: Expression, + plan: LogicalPlan, + throws: Boolean = false) = { // Resolve expression in one round. // If throws == false or the desired attribute doesn't exist // (like try to resolve `a.b` but `a` doesn't exist), fail and return the origin one. http://git-wip-us.apache.org/repos/asf/spark/blob/17a3f006/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala index 58f6d0e..918233d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala @@ -282,8 +282,14 @@ case class ExpressionEncoder[T]( // If we have nested tuple, the `fromRowExpression` will contains `GetStructField` instead of // `UnresolvedExtractValue`, so we need to check if their ordinals are all valid. // Note that, `BoundReference` contains the expected type, but here we need the actual type, so - // we unbound it by the given `schema` and propagate the actual type to `GetStructField`. - val unbound = fromRowExpression transform { + // we unbound it by the given `schema` and propagate the actual type to `GetStructField`, after + // we resolve the `fromRowExpression`. + val resolved = SimpleAnalyzer.resolveExpression( + fromRowExpression, + LocalRelation(schema), + throws = true) + + val unbound = resolved transform { case b: BoundReference => schema(b.ordinal) } http://git-wip-us.apache.org/repos/asf/spark/blob/17a3f006/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index 44cdc8d..c06dcc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -110,7 +110,12 @@ case class GetStructField(child: Expression, ordinal: Int, name: Option[String] override def dataType: DataType = childSchema(ordinal).dataType override def nullable: Boolean = child.nullable || childSchema(ordinal).nullable - override def toString: String = s"$child.${name.getOrElse(childSchema(ordinal).name)}" + + override def toString: String = { + val fieldName = if (resolved) childSchema(ordinal).name else s"_$ordinal" + s"$child.${name.getOrElse(fieldName)}" + } + override def sql: String = child.sql + s".${quoteIdentifier(name.getOrElse(childSchema(ordinal).name))}" http://git-wip-us.apache.org/repos/asf/spark/blob/17a3f006/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index d7fa236..04d3a25 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -27,8 +27,6 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType} -case class OtherTuple(_1: String, _2: Int) - class DatasetSuite extends QueryTest with SharedSQLContext { import testImplicits._ @@ -636,8 +634,19 @@ class DatasetSuite extends QueryTest with SharedSQLContext { Seq(OuterObject.InnerClass("foo")).toDS(), OuterObject.InnerClass("foo")) } + + test("SPARK-14000: case class with tuple type field") { + checkDataset( + Seq(TupleClass((1, "a"))).toDS(), + TupleClass(1, "a") + ) + } } +case class OtherTuple(_1: String, _2: Int) + +case class TupleClass(data: (Int, String)) + class OuterClass extends Serializable { case class InnerClass(a: String) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org