Repository: spark Updated Branches: refs/heads/master eb10b481c -> 61b80d552
[SPARK-15547][SQL] nested case class in encoder can have different number of fields from the real schema ## What changes were proposed in this pull request? There are 2 kinds of `GetStructField`: 1. resolved from `UnresolvedExtractValue`, and it will have a `name` property. 2. created when we build deserializer expression for nested tuple, no `name` property. When we want to validate the ordinals of nested tuple, we should only catch `GetStructField` without the name property. ## How was this patch tested? new test in `EncoderResolutionSuite` Author: Wenchen Fan <wenc...@databricks.com> Closes #13474 from cloud-fan/ordinal-check. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/61b80d55 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/61b80d55 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/61b80d55 Branch: refs/heads/master Commit: 61b80d552aafb262b5f817f7bc9c0acd0328715b Parents: eb10b48 Author: Wenchen Fan <wenc...@databricks.com> Authored: Fri Jun 3 14:26:24 2016 -0700 Committer: Cheng Lian <l...@databricks.com> Committed: Fri Jun 3 14:26:24 2016 -0700 ---------------------------------------------------------------------- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 7 ++++++- .../spark/sql/catalyst/encoders/EncoderResolutionSuite.scala | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/61b80d55/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 4f6b483..0e68656 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1964,7 +1964,12 @@ class Analyzer( */ private def validateNestedTupleFields(deserializer: Expression): Unit = { val structChildToOrdinals = deserializer - .collect { case g: GetStructField => g } + // There are 2 kinds of `GetStructField`: + // 1. resolved from `UnresolvedExtractValue`, and it will have a `name` property. + // 2. created when we build deserializer expression for nested tuple, no `name` property. + // Here we want to validate the ordinals of nested tuple, so we should only catch + // `GetStructField` without the name property. + .collect { case g: GetStructField if g.name.isEmpty => g } .groupBy(_.child) .mapValues(_.map(_.ordinal).distinct.sorted) http://git-wip-us.apache.org/repos/asf/spark/blob/61b80d55/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index 7251202..802397d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -115,6 +115,12 @@ class EncoderResolutionSuite extends PlanTest { } } + test("nested case class can have different number of fields from the real schema") { + val encoder = ExpressionEncoder[(String, StringIntClass)] + val attrs = Seq('a.string, 'b.struct('a.string, 'b.int, 'c.int)) + encoder.resolveAndBind(attrs) + } + test("throw exception if real type is not compatible with encoder schema") { val msg1 = intercept[AnalysisException] { ExpressionEncoder[StringIntClass].resolveAndBind(Seq('a.string, 'b.long)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org