Github user MaxGekk commented on a diff in the pull request: https://github.com/apache/spark/pull/20727#discussion_r172330645 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala --- @@ -42,7 +52,12 @@ class HadoopFileLinesReader( Array.empty) val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0) val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId) - val reader = new LineRecordReader() + val reader = if (lineSeparator != "\n") { + new LineRecordReader(lineSeparator.getBytes("UTF-8")) + } else { + // This behavior follows Hive. `\n` covers `\r`, `\r\n` and `\n`. --- End diff -- The case for lineSeparator = '\n' covers '\r' and '\r\n', it looks not so good.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org