Jonathancui123 commented on code in PR #37147: URL: https://github.com/apache/spark/pull/37147#discussion_r918421341
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala: ########## @@ -222,7 +226,11 @@ class UnivocityParser( } catch { case NonFatal(e) => // If fails to parse, then tries the way used in 2.0 and 1.x for backwards - // compatibility. + // compatibility only if no custom pattern has been set. If there is a custom pattern, + // fail since it may be different from the default pattern. + if (options.dateFormatInRead.isDefined) { + throw e + } Review Comment: This change makes sense to me. Thanks Ivan :)) ########## sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala: ########## @@ -2788,6 +2788,47 @@ abstract class CSVSuite } } } + + test("SPARK-39731: Correctly parse dates with yyyyMMdd pattern") { + withTempPath { path => + Seq( + "1,2020011,2020011", + "2,20201203,20201203").toDF("data") + .repartition(1) + .write.text(path.getAbsolutePath) + val schema = new StructType() + .add("id", IntegerType) + .add("date", DateType) + .add("ts", TimestampType) + val output = spark.read + .schema(schema) + .option("dateFormat", "yyyyMMdd") + .option("timestampFormat", "yyyyMMdd") + .csv(path.getAbsolutePath) + + def check(mode: String, res: Seq[Row]): Unit = { + withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> mode) { + checkAnswer(output, res) + } + } + + check( + "legacy", + Seq( + Row(1, Date.valueOf("2020-01-01"), Timestamp.valueOf("2020-01-01 00:00:00")), + Row(2, Date.valueOf("2020-12-03"), Timestamp.valueOf("2020-12-03 00:00:00")) + ) + ) + + check( + "corrected", + Seq( + Row(1, null, null), + Row(2, Date.valueOf("2020-12-03"), Timestamp.valueOf("2020-12-03 00:00:00")) + ) Review Comment: For completeness, would you consider adding a check for `LEGACY_TIME_PARSER_POLICY` = `EXCEPTION`? Similar to the following? https://github.com/apache/spark/blob/1193ce78d3efcbe1395305b4b7deb0a195fa09d9/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala#L2598-L2601 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org