spark git commit: [SPARK-3365][SQL]Wrong schema generated for List type
Repository: spark Updated Branches: refs/heads/master 2aea892eb - 1c8633f3f [SPARK-3365][SQL]Wrong schema generated for List type This PR fix the issue SPARK-3365. The reason is Spark generated wrong schema for the type `List` in `ScalaReflection.scala` for example: the generated schema for type `Seq[String]` is: ``` {name:x,type:{type:array,elementType:string,containsNull:true},nullable:true,metadata:{}}` ``` the generated schema for type `List[String]` is: ``` {name:x,type:{type:struct,fields:[]},nullable:true,metadata:{}}` ``` Author: tianyi tianyi.asiai...@gmail.com Closes #4581 from tianyi/SPARK-3365 and squashes the following commits: a097e86 [tianyi] change the order of resolution in ScalaReflection.scala Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c8633f3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c8633f3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c8633f3 Branch: refs/heads/master Commit: 1c8633f3fe9d814c83384e339b958740c250c00c Parents: 2aea892 Author: tianyi tianyi.asiai...@gmail.com Authored: Thu Feb 12 22:18:39 2015 -0800 Committer: Cheng Lian l...@databricks.com Committed: Thu Feb 12 22:18:39 2015 -0800 -- .../spark/sql/catalyst/ScalaReflection.scala| 30 ++-- .../sql/catalyst/ScalaReflectionSuite.scala | 5 2 files changed, 20 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1c8633f3/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 5d9c331..11fd443 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -122,6 +122,21 @@ trait ScalaReflection { case t if t : typeOf[Option[_]] = val TypeRef(_, _, Seq(optType)) = t Schema(schemaFor(optType).dataType, nullable = true) + // Need to decide if we actually need a special type here. + case t if t : typeOf[Array[Byte]] = Schema(BinaryType, nullable = true) + case t if t : typeOf[Array[_]] = +val TypeRef(_, _, Seq(elementType)) = t +val Schema(dataType, nullable) = schemaFor(elementType) +Schema(ArrayType(dataType, containsNull = nullable), nullable = true) + case t if t : typeOf[Seq[_]] = +val TypeRef(_, _, Seq(elementType)) = t +val Schema(dataType, nullable) = schemaFor(elementType) +Schema(ArrayType(dataType, containsNull = nullable), nullable = true) + case t if t : typeOf[Map[_, _]] = +val TypeRef(_, _, Seq(keyType, valueType)) = t +val Schema(valueDataType, valueNullable) = schemaFor(valueType) +Schema(MapType(schemaFor(keyType).dataType, + valueDataType, valueContainsNull = valueNullable), nullable = true) case t if t : typeOf[Product] = val formalTypeArgs = t.typeSymbol.asClass.typeParams val TypeRef(_, _, actualTypeArgs) = t @@ -144,21 +159,6 @@ trait ScalaReflection { schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs)) StructField(p.name.toString, dataType, nullable) }), nullable = true) - // Need to decide if we actually need a special type here. - case t if t : typeOf[Array[Byte]] = Schema(BinaryType, nullable = true) - case t if t : typeOf[Array[_]] = -val TypeRef(_, _, Seq(elementType)) = t -val Schema(dataType, nullable) = schemaFor(elementType) -Schema(ArrayType(dataType, containsNull = nullable), nullable = true) - case t if t : typeOf[Seq[_]] = -val TypeRef(_, _, Seq(elementType)) = t -val Schema(dataType, nullable) = schemaFor(elementType) -Schema(ArrayType(dataType, containsNull = nullable), nullable = true) - case t if t : typeOf[Map[_, _]] = -val TypeRef(_, _, Seq(keyType, valueType)) = t -val Schema(valueDataType, valueNullable) = schemaFor(valueType) -Schema(MapType(schemaFor(keyType).dataType, - valueDataType, valueContainsNull = valueNullable), nullable = true) case t if t : typeOf[String] = Schema(StringType, nullable = true) case t if t : typeOf[Timestamp] = Schema(TimestampType, nullable = true) case t if t : typeOf[java.sql.Date] = Schema(DateType, nullable = true) http://git-wip-us.apache.org/repos/asf/spark/blob/1c8633f3/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
spark git commit: [SPARK-3365][SQL]Wrong schema generated for List type
Repository: spark Updated Branches: refs/heads/branch-1.3 edbac178d - b9f332ab6 [SPARK-3365][SQL]Wrong schema generated for List type This PR fix the issue SPARK-3365. The reason is Spark generated wrong schema for the type `List` in `ScalaReflection.scala` for example: the generated schema for type `Seq[String]` is: ``` {name:x,type:{type:array,elementType:string,containsNull:true},nullable:true,metadata:{}}` ``` the generated schema for type `List[String]` is: ``` {name:x,type:{type:struct,fields:[]},nullable:true,metadata:{}}` ``` Author: tianyi tianyi.asiai...@gmail.com Closes #4581 from tianyi/SPARK-3365 and squashes the following commits: a097e86 [tianyi] change the order of resolution in ScalaReflection.scala (cherry picked from commit 1c8633f3fe9d814c83384e339b958740c250c00c) Signed-off-by: Cheng Lian l...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9f332ab Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9f332ab Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9f332ab Branch: refs/heads/branch-1.3 Commit: b9f332ab680f671a368a8411679bb4c52d495486 Parents: edbac17 Author: tianyi tianyi.asiai...@gmail.com Authored: Thu Feb 12 22:18:39 2015 -0800 Committer: Cheng Lian l...@databricks.com Committed: Thu Feb 12 22:19:01 2015 -0800 -- .../spark/sql/catalyst/ScalaReflection.scala| 30 ++-- .../sql/catalyst/ScalaReflectionSuite.scala | 5 2 files changed, 20 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b9f332ab/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 5d9c331..11fd443 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -122,6 +122,21 @@ trait ScalaReflection { case t if t : typeOf[Option[_]] = val TypeRef(_, _, Seq(optType)) = t Schema(schemaFor(optType).dataType, nullable = true) + // Need to decide if we actually need a special type here. + case t if t : typeOf[Array[Byte]] = Schema(BinaryType, nullable = true) + case t if t : typeOf[Array[_]] = +val TypeRef(_, _, Seq(elementType)) = t +val Schema(dataType, nullable) = schemaFor(elementType) +Schema(ArrayType(dataType, containsNull = nullable), nullable = true) + case t if t : typeOf[Seq[_]] = +val TypeRef(_, _, Seq(elementType)) = t +val Schema(dataType, nullable) = schemaFor(elementType) +Schema(ArrayType(dataType, containsNull = nullable), nullable = true) + case t if t : typeOf[Map[_, _]] = +val TypeRef(_, _, Seq(keyType, valueType)) = t +val Schema(valueDataType, valueNullable) = schemaFor(valueType) +Schema(MapType(schemaFor(keyType).dataType, + valueDataType, valueContainsNull = valueNullable), nullable = true) case t if t : typeOf[Product] = val formalTypeArgs = t.typeSymbol.asClass.typeParams val TypeRef(_, _, actualTypeArgs) = t @@ -144,21 +159,6 @@ trait ScalaReflection { schemaFor(p.typeSignature.substituteTypes(formalTypeArgs, actualTypeArgs)) StructField(p.name.toString, dataType, nullable) }), nullable = true) - // Need to decide if we actually need a special type here. - case t if t : typeOf[Array[Byte]] = Schema(BinaryType, nullable = true) - case t if t : typeOf[Array[_]] = -val TypeRef(_, _, Seq(elementType)) = t -val Schema(dataType, nullable) = schemaFor(elementType) -Schema(ArrayType(dataType, containsNull = nullable), nullable = true) - case t if t : typeOf[Seq[_]] = -val TypeRef(_, _, Seq(elementType)) = t -val Schema(dataType, nullable) = schemaFor(elementType) -Schema(ArrayType(dataType, containsNull = nullable), nullable = true) - case t if t : typeOf[Map[_, _]] = -val TypeRef(_, _, Seq(keyType, valueType)) = t -val Schema(valueDataType, valueNullable) = schemaFor(valueType) -Schema(MapType(schemaFor(keyType).dataType, - valueDataType, valueContainsNull = valueNullable), nullable = true) case t if t : typeOf[String] = Schema(StringType, nullable = true) case t if t : typeOf[Timestamp] = Schema(TimestampType, nullable = true) case t if t : typeOf[java.sql.Date] = Schema(DateType, nullable = true)