Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20937#discussion_r183227312
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
 ---
    @@ -41,19 +41,25 @@ private[text] class TextOptions(@transient private val 
parameters: CaseInsensiti
        */
       val wholeText = parameters.getOrElse(WHOLETEXT, "false").toBoolean
     
    -  private val lineSeparator: Option[String] = 
parameters.get(LINE_SEPARATOR).map { sep =>
    -    require(sep.nonEmpty, s"'$LINE_SEPARATOR' cannot be an empty string.")
    -    sep
    +  val encoding: Option[String] = parameters.get(ENCODING)
    +
    +  val lineSeparator: Option[String] = parameters.get(LINE_SEPARATOR).map { 
lineSep =>
    +    require(lineSep.nonEmpty, s"'$LINE_SEPARATOR' cannot be an empty 
string.")
    +
    +    lineSep
       }
    +
       // Note that the option 'lineSep' uses a different default value in read 
and write.
    -  val lineSeparatorInRead: Option[Array[Byte]] =
    -    lineSeparator.map(_.getBytes(StandardCharsets.UTF_8))
    +  val lineSeparatorInRead: Option[Array[Byte]] = lineSeparator.map { 
lineSep =>
    +    lineSep.getBytes(encoding.getOrElse("UTF-8"))
    +  }
       val lineSeparatorInWrite: Array[Byte] =
    -    lineSeparatorInRead.getOrElse("\n".getBytes(StandardCharsets.UTF_8))
    +    lineSeparatorInRead.getOrElse("\n".getBytes("UTF-8"))
    --- End diff --
    
    not a big deal at all but was just wondering if there was a reason to 
choose `"UTF-8"` over `StandardCharsets.UTF_8` because I was thinking 
`StandardCharsets.UTF_8` is slightly better. 


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to