xuanyuanking commented on a change in pull request #27830: [SPARK-31030][SQL] Backward Compatibility for Parsing and formatting Datetime URL: https://github.com/apache/spark/pull/27830#discussion_r390299797
########## File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala ########## @@ -906,4 +906,45 @@ object DateTimeUtils { val days = period.getDays new CalendarInterval(months, days, 0) } + + /** + * Since the Proleptic Gregorian calendar is de-facto calendar worldwide, as well as the chosen + * one in ANSI SQL standard, Spark 3.0 switches to it by using DateTimeFormatter classes. + * However, the breaking changes between SimpleDateFormat and DateTimeFormatter pattern string + * will also breaks the backward compatibility of Spark 2.4 and earlier when parsing datetime. + * This function converts all incompatible pattern for the new parser in Spark 3.0. See more + * details in SPARK-31030. + * + * @param pattern The input pattern. + * @return The pattern for new parser + */ + def convertIncompatiblePattern(pattern: String): String = { + val eraDesignatorContained = pattern.split("'").zipWithIndex.exists { + case (patternPart, index) => + // Text can be quoted using single quotes, we only check the non-quote parts. + index % 2 == 0 && patternPart.contains("G") + } + pattern.split("'").zipWithIndex.map { + case (patternPart, index) => + if (index % 2 == 0) { + // The meaning of 'u' was day number of week in Java 7, it was changed to year in Java 8. + // Substitute 'u' to 'e' and use Java 8 parser to parse the string. If parsable, return + // the result; otherwise, fall back to 'u', and then use the legacy Java 7 parser to + // parse. When it is successfully parsed, throw an exception and ask users to change + // the pattern strings or turn on the legacy mode; otherwise, return NULL as what Spark + // 2.4 does. + val res = patternPart.replace("u", "e") + // In Java 8 API, 'u' supports negative years. We substitute 'y' to 'u' here for keeping Review comment: Thanks, done in 5382508. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org