spark git commit: [SPARK-17335][SQL] Fix ArrayType and MapType CatalogString.
Repository: spark Updated Branches: refs/heads/branch-2.0 3500dbc9b -> 704215d30 [SPARK-17335][SQL] Fix ArrayType and MapType CatalogString. ## What changes were proposed in this pull request? the `catalogString` for `ArrayType` and `MapType` currently calls the `simpleString` method on its children. This is a problem when the child is a struct, the `struct.simpleString` implementation truncates the number of fields it shows (25 at max). This breaks the generation of a proper `catalogString`, and has shown to cause errors while writing to Hive. This PR fixes this by providing proper `catalogString` implementations for `ArrayData` or `MapData`. ## How was this patch tested? Added testing for `catalogString` to `DataTypeSuite`. Author: Herman van HovellCloses #14938 from hvanhovell/SPARK-17335. (cherry picked from commit c2a1576c230697f56f282b6388c79835377e0f2f) Signed-off-by: Herman van Hovell Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/704215d3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/704215d3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/704215d3 Branch: refs/heads/branch-2.0 Commit: 704215d3055bad7957d1d6da1a1a526c0d27d37d Parents: 3500dbc Author: Herman van Hovell Authored: Sat Sep 3 19:02:20 2016 +0200 Committer: Herman van Hovell Committed: Sat Sep 3 19:02:35 2016 +0200 -- .../org/apache/spark/sql/types/ArrayType.scala | 2 + .../org/apache/spark/sql/types/MapType.scala| 2 + .../apache/spark/sql/types/DataTypeSuite.scala | 30 .../benchmarks/WideSchemaBenchmark-results.txt | 174 +++ 4 files changed, 133 insertions(+), 75 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/704215d3/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala index 520e344..82a03b0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala @@ -77,6 +77,8 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT override def simpleString: String = s"array<${elementType.simpleString}>" + override def catalogString: String = s"array<${elementType.catalogString}>" + override def sql: String = s"ARRAY<${elementType.sql}>" override private[spark] def asNullable: ArrayType = http://git-wip-us.apache.org/repos/asf/spark/blob/704215d3/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala index 454ea40..1789609 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala @@ -64,6 +64,8 @@ case class MapType( override def simpleString: String = s"map<${keyType.simpleString},${valueType.simpleString}>" + override def catalogString: String = s"map<${keyType.catalogString},${valueType.catalogString}>" + override def sql: String = s"MAP<${keyType.sql}, ${valueType.sql}>" override private[spark] def asNullable: MapType = http://git-wip-us.apache.org/repos/asf/spark/blob/704215d3/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 6b85f12..569230a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.types import org.apache.spark.{SparkException, SparkFunSuite} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser class DataTypeSuite extends SparkFunSuite { @@ -342,4 +343,33 @@ class DataTypeSuite extends SparkFunSuite { StructField("a", StringType, nullable = false) :: StructField("b", StringType, nullable = false) :: Nil), expected = false) + + def checkCatalogString(dt: DataType): Unit = { +test(s"catalogString: $dt") { + val dt2 = CatalystSqlParser.parseDataType(dt.catalogString) + assert(dt === dt2) +} + } + def
spark git commit: [SPARK-17335][SQL] Fix ArrayType and MapType CatalogString.
Repository: spark Updated Branches: refs/heads/master a8a35b39b -> c2a1576c2 [SPARK-17335][SQL] Fix ArrayType and MapType CatalogString. ## What changes were proposed in this pull request? the `catalogString` for `ArrayType` and `MapType` currently calls the `simpleString` method on its children. This is a problem when the child is a struct, the `struct.simpleString` implementation truncates the number of fields it shows (25 at max). This breaks the generation of a proper `catalogString`, and has shown to cause errors while writing to Hive. This PR fixes this by providing proper `catalogString` implementations for `ArrayData` or `MapData`. ## How was this patch tested? Added testing for `catalogString` to `DataTypeSuite`. Author: Herman van HovellCloses #14938 from hvanhovell/SPARK-17335. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2a1576c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2a1576c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2a1576c Branch: refs/heads/master Commit: c2a1576c230697f56f282b6388c79835377e0f2f Parents: a8a35b3 Author: Herman van Hovell Authored: Sat Sep 3 19:02:20 2016 +0200 Committer: Herman van Hovell Committed: Sat Sep 3 19:02:20 2016 +0200 -- .../org/apache/spark/sql/types/ArrayType.scala | 2 + .../org/apache/spark/sql/types/MapType.scala| 2 + .../apache/spark/sql/types/DataTypeSuite.scala | 30 .../benchmarks/WideSchemaBenchmark-results.txt | 174 +++ 4 files changed, 133 insertions(+), 75 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c2a1576c/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala index 520e344..82a03b0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala @@ -77,6 +77,8 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT override def simpleString: String = s"array<${elementType.simpleString}>" + override def catalogString: String = s"array<${elementType.catalogString}>" + override def sql: String = s"ARRAY<${elementType.sql}>" override private[spark] def asNullable: ArrayType = http://git-wip-us.apache.org/repos/asf/spark/blob/c2a1576c/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala index 454ea40..1789609 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala @@ -64,6 +64,8 @@ case class MapType( override def simpleString: String = s"map<${keyType.simpleString},${valueType.simpleString}>" + override def catalogString: String = s"map<${keyType.catalogString},${valueType.catalogString}>" + override def sql: String = s"MAP<${keyType.sql}, ${valueType.sql}>" override private[spark] def asNullable: MapType = http://git-wip-us.apache.org/repos/asf/spark/blob/c2a1576c/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala index 688bc3e..b8ab9a9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala @@ -18,6 +18,7 @@ package org.apache.spark.sql.types import org.apache.spark.{SparkException, SparkFunSuite} +import org.apache.spark.sql.catalyst.parser.CatalystSqlParser class DataTypeSuite extends SparkFunSuite { @@ -359,4 +360,33 @@ class DataTypeSuite extends SparkFunSuite { StructField("a", StringType, nullable = false) :: StructField("b", StringType, nullable = false) :: Nil), expected = false) + + def checkCatalogString(dt: DataType): Unit = { +test(s"catalogString: $dt") { + val dt2 = CatalystSqlParser.parseDataType(dt.catalogString) + assert(dt === dt2) +} + } + def createStruct(n: Int): StructType = new StructType(Array.tabulate(n) { +i => StructField(s"col$i", IntegerType, nullable = true) + }) +