Github user MaxGekk commented on a diff in the pull request: https://github.com/apache/spark/pull/21657#discussion_r201042748 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala --- @@ -38,24 +38,28 @@ class UnivocityParser( requiredSchema: StructType, val options: CSVOptions) extends Logging { require(requiredSchema.toSet.subsetOf(dataSchema.toSet), - "requiredSchema should be the subset of schema.") + "requiredSchema should be the subset of dataSchema.") def this(schema: StructType, options: CSVOptions) = this(schema, schema, options) // A `ValueConverter` is responsible for converting the given value to a desired type. private type ValueConverter = String => Any + // This index is used to reorder parsed tokens + private val tokenIndexArr = + requiredSchema.map(f => java.lang.Integer.valueOf(dataSchema.indexOf(f))).toArray --- End diff -- Just in case, we can do an optimization by memory here. The array is used under the flag `options.columnPruning` only. We can create an empty array (or null) if `options.columnPruning` is set to `false`.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org