[ https://issues.apache.org/jira/browse/SPARK-22935?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Kazuaki Ishizaki updated SPARK-22935: ------------------------------------- Description: The following code prints {{c=0}}. The value of {{c}} must be 2. {{ds}} must not be {{empty}}. This occurs with whole-stage codegen or without whole-stage codegen. {code} public void SPARK22935() { Dataset<CDR> cdr = spark .read() .format("csv") .option("header", "true") .option("inferSchema", "true") .option("delimiter", ";") .csv("CDR_SAMPLE.csv") .as(Encoders.bean(CDR.class)); Dataset<CDR> ds = cdr.filter((FilterFunction<CDR>) x -> (x.timestamp != null)); long c = ds.count(); cdr.show(2); ds.show(2); System.out.println("cnt=" + c); } // CDR.java public class CDR implements java.io.Serializable { public java.sql.Date timestamp; } // CDR_SAMPLE.csv timestamp 2017-10-29T02:37:07.815Z 2017-10-29T02:38:07.815Z {code} result {code} +--------------------+ | timestamp| +--------------------+ |2017-10-29 11:37:...| |2017-10-29 11:38:...| +--------------------+ +---------+ |timestamp| +---------+ +---------+ c=0 {code} was: The following code prints {{c=0}}. The value of {{c}} must be 2. {ds} must not be {{empty}}. {code} public void SPARK22935() { Dataset<CDR> cdr = spark .read() .format("csv") .option("header", "true") .option("inferSchema", "true") .option("delimiter", ";") .csv("CDR_SAMPLE.csv") .as(Encoders.bean(CDR.class)); Dataset<CDR> ds = cdr.filter((FilterFunction<CDR>) x -> (x.timestamp != null)); long c = ds.count(); cdr.show(2); ds.show(2); System.out.println("cnt=" + c); } // CDR.java public class CDR implements java.io.Serializable { public java.sql.Date timestamp; } // CDR_SAMPLE.csv timestamp 2017-10-29T02:37:07.815Z 2017-10-29T02:38:07.815Z {code} result {code} +--------------------+ | timestamp| +--------------------+ |2017-10-29 11:37:...| |2017-10-29 11:38:...| +--------------------+ +---------+ |timestamp| +---------+ +---------+ c=0 {code} > Dataset with Java Beans for java.sql.Date produces incorrect result > ------------------------------------------------------------------- > > Key: SPARK-22935 > URL: https://issues.apache.org/jira/browse/SPARK-22935 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.2.1, 2.3.0 > Reporter: Kazuaki Ishizaki > > The following code prints {{c=0}}. The value of {{c}} must be 2. {{ds}} must > not be {{empty}}. > This occurs with whole-stage codegen or without whole-stage codegen. > {code} > public void SPARK22935() { > Dataset<CDR> cdr = spark > .read() > .format("csv") > .option("header", "true") > .option("inferSchema", "true") > .option("delimiter", ";") > .csv("CDR_SAMPLE.csv") > .as(Encoders.bean(CDR.class)); > Dataset<CDR> ds = cdr.filter((FilterFunction<CDR>) x -> (x.timestamp != > null)); > long c = ds.count(); > cdr.show(2); > ds.show(2); > System.out.println("cnt=" + c); > } > // CDR.java > public class CDR implements java.io.Serializable { > public java.sql.Date timestamp; > } > // CDR_SAMPLE.csv > timestamp > 2017-10-29T02:37:07.815Z > 2017-10-29T02:38:07.815Z > {code} > result > {code} > +--------------------+ > | timestamp| > +--------------------+ > |2017-10-29 11:37:...| > |2017-10-29 11:38:...| > +--------------------+ > +---------+ > |timestamp| > +---------+ > +---------+ > c=0 > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org