[ https://issues.apache.org/jira/browse/SPARK-31557?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17530477#comment-17530477 ]
Apache Spark commented on SPARK-31557: -------------------------------------- User 'bersprockets' has created a pull request for this issue: https://github.com/apache/spark/pull/28345 > Legacy parser incorrectly interprets pre-Gregorian dates > -------------------------------------------------------- > > Key: SPARK-31557 > URL: https://issues.apache.org/jira/browse/SPARK-31557 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.0.0, 3.1.0 > Reporter: Bruce Robbins > Assignee: Bruce Robbins > Priority: Major > Fix For: 3.0.0 > > > With CSV: > {noformat} > scala> sql("set spark.sql.legacy.timeParserPolicy=LEGACY") > res0: org.apache.spark.sql.DataFrame = [key: string, value: string] > scala> val seq = Seq("0002-01-01", "1000-01-01", "1500-01-01", > "1800-01-01").map(x => s"$x,$x") > seq: Seq[String] = List(0002-01-01,0002-01-01, 1000-01-01,1000-01-01, > 1500-01-01,1500-01-01, 1800-01-01,1800-01-01) > scala> val ds = seq.toDF("value").as[String] > ds: org.apache.spark.sql.Dataset[String] = [value: string] > scala> spark.read.schema("expected STRING, actual DATE").csv(ds).show > +----------+----------+ > | expected| actual| > +----------+----------+ > |0002-01-01|0001-12-30| > |1000-01-01|1000-01-06| > |1500-01-01|1500-01-10| > |1800-01-01|1800-01-01| > +----------+----------+ > scala> > {noformat} > Similarly, with JSON: > {noformat} > scala> sql("set spark.sql.legacy.timeParserPolicy=LEGACY") > res0: org.apache.spark.sql.DataFrame = [key: string, value: string] > scala> val seq = Seq("0002-01-01", "1000-01-01", "1500-01-01", > "1800-01-01").map { x => > s"""{"expected": "$x", "actual": "$x"}""" > } > | | seq: Seq[String] = List({"expected": "0002-01-01", "actual": > "0002-01-01"}, {"expected": "1000-01-01", "actual": "1000-01-01"}, > {"expected": "1500-01-01", "actual": "1500-01-01"}, {"expected": > "1800-01-01", "actual": "1800-01-01"}) > scala> > scala> val ds = seq.toDF("value").as[String] > ds: org.apache.spark.sql.Dataset[String] = [value: string] > scala> spark.read.schema("expected STRING, actual DATE").json(ds).show > +----------+----------+ > | expected| actual| > +----------+----------+ > |0002-01-01|0001-12-30| > |1000-01-01|1000-01-06| > |1500-01-01|1500-01-10| > |1800-01-01|1800-01-01| > +----------+----------+ > scala> > {noformat} -- This message was sent by Atlassian Jira (v8.20.7#820007) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org