Github user gengliangwang commented on a diff in the pull request: https://github.com/apache/spark/pull/22374#discussion_r216593835 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala --- @@ -243,17 +243,21 @@ object TextInputCSVDataSource extends CSVDataSource { parsedOptions: CSVOptions): StructType = maybeFirstLine match { case Some(firstLine) => val firstRow = new CsvParser(parsedOptions.asParserSettings).parseLine(firstLine) - val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis - val header = makeSafeHeader(firstRow, caseSensitive, parsedOptions) - val sampled: Dataset[String] = CSVUtils.sample(csv, parsedOptions) - val tokenRDD = sampled.rdd.mapPartitions { iter => - val filteredLines = CSVUtils.filterCommentAndEmpty(iter, parsedOptions) - val linesWithoutHeader = - CSVUtils.filterHeaderLine(filteredLines, firstLine, parsedOptions) - val parser = new CsvParser(parsedOptions.asParserSettings) - linesWithoutHeader.map(parser.parseLine) + if (firstRow != null) { --- End diff -- Can we simplify the code as ``` maybeFirstLine.map(new CsvParser(parsedOptions.asParserSettings).parseLine(_)) match { case Some(firstRow) if firstRow != null => case _ => ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org