spark git commit: [SPARK-20125][SQL] Dataset of type option of map does not work
Repository: spark Updated Branches: refs/heads/branch-2.1 4bcb7d676 -> fd2e40614 [SPARK-20125][SQL] Dataset of type option of map does not work When we build the deserializer expression for map type, we will use `StaticInvoke` to call `ArrayBasedMapData.toScalaMap`, and declare the return type as `scala.collection.immutable.Map`. If the map is inside an Option, we will wrap this `StaticInvoke` with `WrapOption`, which requires the input to be `scala.collect.Map`. Ideally this should be fine, as `scala.collection.immutable.Map` extends `scala.collect.Map`, but our `ObjectType` is too strict about this, this PR fixes it. new regression test Author: Wenchen FanCloses #17454 from cloud-fan/map. (cherry picked from commit d4fac410e0554b7ccd44be44b7ce2fe07ed7f206) Signed-off-by: Cheng Lian Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd2e4061 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd2e4061 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd2e4061 Branch: refs/heads/branch-2.1 Commit: fd2e40614b511fb9ef3e52cc1351659fdbfd612a Parents: 4bcb7d6 Author: Wenchen Fan Authored: Tue Mar 28 11:47:43 2017 -0700 Committer: Cheng Lian Committed: Tue Mar 28 12:36:27 2017 -0700 -- .../src/main/scala/org/apache/spark/sql/types/ObjectType.scala | 5 + .../src/test/scala/org/apache/spark/sql/DatasetSuite.scala | 6 ++ 2 files changed, 11 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fd2e4061/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala index b18fba2..2d49fe0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala @@ -44,4 +44,9 @@ case class ObjectType(cls: Class[_]) extends DataType { def asNullable: DataType = this override def simpleString: String = cls.getName + + override def acceptsType(other: DataType): Boolean = other match { +case ObjectType(otherCls) => cls.isAssignableFrom(otherCls) +case _ => false + } } http://git-wip-us.apache.org/repos/asf/spark/blob/fd2e4061/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 381652d..9cc49b6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1072,10 +1072,16 @@ class DatasetSuite extends QueryTest with SharedSQLContext { val ds2 = Seq(WithMap("hi", Map(42L -> "foo"))).toDS checkDataset(ds2.map(t => t), WithMap("hi", Map(42L -> "foo"))) } + + test("SPARK-20125: option of map") { +val ds = Seq(WithMapInOption(Some(Map(1 -> 1.toDS() +checkDataset(ds, WithMapInOption(Some(Map(1 -> 1 + } } case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String]) case class WithMap(id: String, map_test: scala.collection.Map[Long, String]) +case class WithMapInOption(m: Option[scala.collection.Map[Int, Int]]) case class Generic[T](id: T, value: Double) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20125][SQL] Dataset of type option of map does not work
Repository: spark Updated Branches: refs/heads/master 17eddb35a -> d4fac410e [SPARK-20125][SQL] Dataset of type option of map does not work ## What changes were proposed in this pull request? When we build the deserializer expression for map type, we will use `StaticInvoke` to call `ArrayBasedMapData.toScalaMap`, and declare the return type as `scala.collection.immutable.Map`. If the map is inside an Option, we will wrap this `StaticInvoke` with `WrapOption`, which requires the input to be `scala.collect.Map`. Ideally this should be fine, as `scala.collection.immutable.Map` extends `scala.collect.Map`, but our `ObjectType` is too strict about this, this PR fixes it. ## How was this patch tested? new regression test Author: Wenchen FanCloses #17454 from cloud-fan/map. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4fac410 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4fac410 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4fac410 Branch: refs/heads/master Commit: d4fac410e0554b7ccd44be44b7ce2fe07ed7f206 Parents: 17eddb3 Author: Wenchen Fan Authored: Tue Mar 28 11:47:43 2017 -0700 Committer: Cheng Lian Committed: Tue Mar 28 11:47:43 2017 -0700 -- .../src/main/scala/org/apache/spark/sql/types/ObjectType.scala | 5 + .../src/test/scala/org/apache/spark/sql/DatasetSuite.scala | 6 ++ 2 files changed, 11 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d4fac410/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala index b18fba2..2d49fe0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ObjectType.scala @@ -44,4 +44,9 @@ case class ObjectType(cls: Class[_]) extends DataType { def asNullable: DataType = this override def simpleString: String = cls.getName + + override def acceptsType(other: DataType): Boolean = other match { +case ObjectType(otherCls) => cls.isAssignableFrom(otherCls) +case _ => false + } } http://git-wip-us.apache.org/repos/asf/spark/blob/d4fac410/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 6417e7a..68e071a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1154,10 +1154,16 @@ class DatasetSuite extends QueryTest with SharedSQLContext { assert(errMsg3.getMessage.startsWith("cannot have circular references in class, but got the " + "circular reference of class")) } + + test("SPARK-20125: option of map") { +val ds = Seq(WithMapInOption(Some(Map(1 -> 1.toDS() +checkDataset(ds, WithMapInOption(Some(Map(1 -> 1 + } } case class WithImmutableMap(id: String, map_test: scala.collection.immutable.Map[Long, String]) case class WithMap(id: String, map_test: scala.collection.Map[Long, String]) +case class WithMapInOption(m: Option[scala.collection.Map[Int, Int]]) case class Generic[T](id: T, value: Double) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org