Github user gengliangwang commented on a diff in the pull request:
https://github.com/apache/spark/pull/22374#discussion_r216593835
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
---
@@ -243,17 +243,21 @@ object TextInputCSVDataSource extends CSVDataSource {
parsedOptions: CSVOptions): StructType = maybeFirstLine match {
case Some(firstLine) =>
val firstRow = new
CsvParser(parsedOptions.asParserSettings).parseLine(firstLine)
- val caseSensitive =
sparkSession.sessionState.conf.caseSensitiveAnalysis
- val header = makeSafeHeader(firstRow, caseSensitive, parsedOptions)
- val sampled: Dataset[String] = CSVUtils.sample(csv, parsedOptions)
- val tokenRDD = sampled.rdd.mapPartitions { iter =>
- val filteredLines = CSVUtils.filterCommentAndEmpty(iter,
parsedOptions)
- val linesWithoutHeader =
- CSVUtils.filterHeaderLine(filteredLines, firstLine,
parsedOptions)
- val parser = new CsvParser(parsedOptions.asParserSettings)
- linesWithoutHeader.map(parser.parseLine)
+ if (firstRow != null) {
--- End diff --
Can we simplify the code as
```
maybeFirstLine.map(new
CsvParser(parsedOptions.asParserSettings).parseLine(_)) match {
case Some(firstRow) if firstRow != null =>
case _ =>
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]