cloud-fan commented on code in PR #37933: URL: https://github.com/apache/spark/pull/37933#discussion_r977085759
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala: ########## @@ -233,7 +238,39 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable { * is compatible with both input data types. */ private def compatibleType(t1: DataType, t2: DataType): Option[DataType] = { - TypeCoercion.findTightestCommonType(t1, t2).orElse(findCompatibleTypeForCSV(t1, t2)) + (t1, t2) match { + case (DateType, TimestampType) | (DateType, TimestampNTZType) | + (TimestampNTZType, DateType) | (TimestampType, DateType) => + // For a column containing a mixture of dates and timestamps + // infer it as timestamp type if its dates can be inferred as timestamp type + // otherwise infer it as StringType Review Comment: let's enrich the comment a bit more ``` This only happens when the timestamp pattern is not specified, as the default timestamp parser is very lenient and can parse date string as well. ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org