spark git commit: [SPARK-17335][SQL] Fix ArrayType and MapType CatalogString.

2016-09-03 Thread hvanhovell
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 3500dbc9b -> 704215d30


[SPARK-17335][SQL] Fix ArrayType and MapType CatalogString.

## What changes were proposed in this pull request?
the `catalogString` for `ArrayType` and `MapType` currently calls the 
`simpleString` method on its children. This is a problem when the child is a 
struct, the `struct.simpleString` implementation truncates the number of fields 
it shows (25 at max). This breaks the generation of a proper `catalogString`, 
and has shown to cause errors while writing to Hive.

This PR fixes this by providing proper `catalogString` implementations for 
`ArrayData` or `MapData`.

## How was this patch tested?
Added testing for `catalogString` to `DataTypeSuite`.

Author: Herman van Hovell 

Closes #14938 from hvanhovell/SPARK-17335.

(cherry picked from commit c2a1576c230697f56f282b6388c79835377e0f2f)
Signed-off-by: Herman van Hovell 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/704215d3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/704215d3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/704215d3

Branch: refs/heads/branch-2.0
Commit: 704215d3055bad7957d1d6da1a1a526c0d27d37d
Parents: 3500dbc
Author: Herman van Hovell 
Authored: Sat Sep 3 19:02:20 2016 +0200
Committer: Herman van Hovell 
Committed: Sat Sep 3 19:02:35 2016 +0200

--
 .../org/apache/spark/sql/types/ArrayType.scala  |   2 +
 .../org/apache/spark/sql/types/MapType.scala|   2 +
 .../apache/spark/sql/types/DataTypeSuite.scala  |  30 
 .../benchmarks/WideSchemaBenchmark-results.txt  | 174 +++
 4 files changed, 133 insertions(+), 75 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/704215d3/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index 520e344..82a03b0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -77,6 +77,8 @@ case class ArrayType(elementType: DataType, containsNull: 
Boolean) extends DataT
 
   override def simpleString: String = s"array<${elementType.simpleString}>"
 
+  override def catalogString: String = s"array<${elementType.catalogString}>"
+
   override def sql: String = s"ARRAY<${elementType.sql}>"
 
   override private[spark] def asNullable: ArrayType =

http://git-wip-us.apache.org/repos/asf/spark/blob/704215d3/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
index 454ea40..1789609 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -64,6 +64,8 @@ case class MapType(
 
   override def simpleString: String = 
s"map<${keyType.simpleString},${valueType.simpleString}>"
 
+  override def catalogString: String = 
s"map<${keyType.catalogString},${valueType.catalogString}>"
+
   override def sql: String = s"MAP<${keyType.sql}, ${valueType.sql}>"
 
   override private[spark] def asNullable: MapType =

http://git-wip-us.apache.org/repos/asf/spark/blob/704215d3/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 6b85f12..569230a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.types
 
 import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 
 class DataTypeSuite extends SparkFunSuite {
 
@@ -342,4 +343,33 @@ class DataTypeSuite extends SparkFunSuite {
   StructField("a", StringType, nullable = false) ::
   StructField("b", StringType, nullable = false) :: Nil),
 expected = false)
+
+  def checkCatalogString(dt: DataType): Unit = {
+test(s"catalogString: $dt") {
+  val dt2 = CatalystSqlParser.parseDataType(dt.catalogString)
+  assert(dt === dt2)
+}
+  }
+  def 

spark git commit: [SPARK-17335][SQL] Fix ArrayType and MapType CatalogString.

2016-09-03 Thread hvanhovell
Repository: spark
Updated Branches:
  refs/heads/master a8a35b39b -> c2a1576c2


[SPARK-17335][SQL] Fix ArrayType and MapType CatalogString.

## What changes were proposed in this pull request?
the `catalogString` for `ArrayType` and `MapType` currently calls the 
`simpleString` method on its children. This is a problem when the child is a 
struct, the `struct.simpleString` implementation truncates the number of fields 
it shows (25 at max). This breaks the generation of a proper `catalogString`, 
and has shown to cause errors while writing to Hive.

This PR fixes this by providing proper `catalogString` implementations for 
`ArrayData` or `MapData`.

## How was this patch tested?
Added testing for `catalogString` to `DataTypeSuite`.

Author: Herman van Hovell 

Closes #14938 from hvanhovell/SPARK-17335.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2a1576c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2a1576c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2a1576c

Branch: refs/heads/master
Commit: c2a1576c230697f56f282b6388c79835377e0f2f
Parents: a8a35b3
Author: Herman van Hovell 
Authored: Sat Sep 3 19:02:20 2016 +0200
Committer: Herman van Hovell 
Committed: Sat Sep 3 19:02:20 2016 +0200

--
 .../org/apache/spark/sql/types/ArrayType.scala  |   2 +
 .../org/apache/spark/sql/types/MapType.scala|   2 +
 .../apache/spark/sql/types/DataTypeSuite.scala  |  30 
 .../benchmarks/WideSchemaBenchmark-results.txt  | 174 +++
 4 files changed, 133 insertions(+), 75 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c2a1576c/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index 520e344..82a03b0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -77,6 +77,8 @@ case class ArrayType(elementType: DataType, containsNull: 
Boolean) extends DataT
 
   override def simpleString: String = s"array<${elementType.simpleString}>"
 
+  override def catalogString: String = s"array<${elementType.catalogString}>"
+
   override def sql: String = s"ARRAY<${elementType.sql}>"
 
   override private[spark] def asNullable: ArrayType =

http://git-wip-us.apache.org/repos/asf/spark/blob/c2a1576c/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
index 454ea40..1789609 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -64,6 +64,8 @@ case class MapType(
 
   override def simpleString: String = 
s"map<${keyType.simpleString},${valueType.simpleString}>"
 
+  override def catalogString: String = 
s"map<${keyType.catalogString},${valueType.catalogString}>"
+
   override def sql: String = s"MAP<${keyType.sql}, ${valueType.sql}>"
 
   override private[spark] def asNullable: MapType =

http://git-wip-us.apache.org/repos/asf/spark/blob/c2a1576c/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 688bc3e..b8ab9a9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.types
 
 import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 
 class DataTypeSuite extends SparkFunSuite {
 
@@ -359,4 +360,33 @@ class DataTypeSuite extends SparkFunSuite {
   StructField("a", StringType, nullable = false) ::
   StructField("b", StringType, nullable = false) :: Nil),
 expected = false)
+
+  def checkCatalogString(dt: DataType): Unit = {
+test(s"catalogString: $dt") {
+  val dt2 = CatalystSqlParser.parseDataType(dt.catalogString)
+  assert(dt === dt2)
+}
+  }
+  def createStruct(n: Int): StructType = new StructType(Array.tabulate(n) {
+i => StructField(s"col$i", IntegerType, nullable = true)
+  })
+