Github user MaxGekk commented on a diff in the pull request: https://github.com/apache/spark/pull/21657#discussion_r198863991 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala --- @@ -131,20 +132,30 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister { ) } val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis + val columnPruning = sparkSession.sessionState.conf.csvColumnPruning (file: PartitionedFile) => { val conf = broadcastedHadoopConf.value.value val parser = new UnivocityParser( StructType(dataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)), StructType(requiredSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)), parsedOptions) - CSVDataSource(parsedOptions).readFile( + val inputRows = CSVDataSource(parsedOptions).readFile( conf, file, parser, requiredSchema, dataSchema, caseSensitive) + + if (columnPruning) { + inputRows + } else { + val inputAttrs = dataSchema.toAttributes --- End diff -- It seems the bug is not related to the column pruning feature, and most likely it presented in previous versions. Should it be ported to the 2.3 branch?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org