Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20140#discussion_r184870477
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchema.scala
 ---
    @@ -140,14 +141,23 @@ private[csv] object CSVInferSchema {
       private def tryParseDouble(field: String, options: CSVOptions): DataType 
= {
         if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field, 
options)) {
           DoubleType
    +    } else {
    +      tryParseDate(field, options)
    +    }
    +  }
    +
    +  private def tryParseDate(field: String, options: CSVOptions): DataType = 
{
    +    // This case infers a custom `dateFormat` is set.
    +    if ((allCatch opt options.dateFormatter.parse(field)).isDefined) {
    +      DateType
         } else {
           tryParseTimestamp(field, options)
         }
       }
     
       private def tryParseTimestamp(field: String, options: CSVOptions): 
DataType = {
    -    // This case infers a custom `dataFormat` is set.
    -    if ((allCatch opt options.timestampFormat.parse(field)).isDefined) {
    +    // This case infers a custom `timestampFormat` is set.
    +    if ((allCatch opt options.timestampFormatter.parse(field)).isDefined) {
    --- End diff --
    
    Probably, adding a configuration to control this behaviour looks preferred 
in this case.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to