[ 
https://issues.apache.org/jira/browse/SPARK-20866?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16024350#comment-16024350
 ] 

Takeshi Yamamuro commented on SPARK-20866:
------------------------------------------

SPARK-18284 [~kiszk] fixed also resolved this issue;
{code}

-- w/o the SPARK-18284 patch
scala> case class Test(a: Int)
defined class Test

scala> val ds1 = (Test(10) :: Nil).toDS
ds1: org.apache.spark.sql.Dataset[Test] = [a: int]

scala> val ds2 = ds1.map(x => Test(x.a))
ds2: org.apache.spark.sql.Dataset[Test] = [a: int]

scala> ds1.schema == ds2.schema
res0: Boolean = false

scala> ds1.schema
res1: org.apache.spark.sql.types.StructType = 
StructType(StructField(a,IntegerType,false))

scala> ds2.schema
res2: org.apache.spark.sql.types.StructType = 
StructType(StructField(a,IntegerType,true))


-- w/ the SPARK-18284 patch
scala> case class Test(a: Int)
defined class Test

scala> val ds1 = (Test(10) :: Nil).toDS
ds1: org.apache.spark.sql.Dataset[Test] = [a: int]

scala> val ds2 = ds1.map(x => Test(x.a))
ds2: org.apache.spark.sql.Dataset[Test] = [a: int]

scala> ds1.schema == ds2.schema
res0: Boolean = true

scala> ds1.schema
res1: org.apache.spark.sql.types.StructType = 
StructType(StructField(a,IntegerType,false))

scala> ds2.schema
res2: org.apache.spark.sql.types.StructType = 
StructType(StructField(a,IntegerType,false))
{code}

> Dataset map does not respect nullable field 
> --------------------------------------------
>
>                 Key: SPARK-20866
>                 URL: https://issues.apache.org/jira/browse/SPARK-20866
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.1.0
>            Reporter: Colin Breame
>
> The Dataset.map does not respect the nullable fields within the schema. 
> *Test code:*
> (run on spark-shell 2.1.0):
> {code}
> scala> case class Test(a: Int)
> defined class Test
> scala> val ds1 = (Test(10) :: Nil).toDS
> ds1: org.apache.spark.sql.Dataset[Test] = [a: int]
> scala> val ds2 = ds1.map(x => Test(x.a))
> ds2: org.apache.spark.sql.Dataset[Test] = [a: int]
> scala> ds1.schema == ds2.schema
> res65: Boolean = false
> scala> ds1.schema
> res62: org.apache.spark.sql.types.StructType = 
> StructType(StructField(a,IntegerType,false))
> scala> ds2.schema
> res63: org.apache.spark.sql.types.StructType = 
> StructType(StructField(a,IntegerType,true))
> {code}
> *Expected*
> The ds1 should equal ds2. i.e. the schema should be the same.
> *Actual*
> The schema is not equal - the StructField nullable property is true in ds2 
> and false in ds1.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to