Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21657#discussion_r202528267
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala
 ---
    @@ -38,24 +38,29 @@ class UnivocityParser(
         requiredSchema: StructType,
         val options: CSVOptions) extends Logging {
       require(requiredSchema.toSet.subsetOf(dataSchema.toSet),
    -    "requiredSchema should be the subset of schema.")
    +    s"requiredSchema (${requiredSchema.catalogString}) should be the 
subset of " +
    +      s"dataSchema (${dataSchema.catalogString}).")
     
       def this(schema: StructType, options: CSVOptions) = this(schema, schema, 
options)
     
       // A `ValueConverter` is responsible for converting the given value to a 
desired type.
       private type ValueConverter = String => Any
     
    +  // This index is used to reorder parsed tokens
    +  private val tokenIndexArr =
    +    requiredSchema.map(f => 
java.lang.Integer.valueOf(dataSchema.indexOf(f))).toArray
    +
       val tokenizer = {
         val parserSetting = options.asParserSettings
         if (options.columnPruning && requiredSchema.length < 
dataSchema.length) {
    -      val tokenIndexArr = requiredSchema.map(f => 
java.lang.Integer.valueOf(dataSchema.indexOf(f)))
           parserSetting.selectIndexes(tokenIndexArr: _*)
         }
         new CsvParser(parserSetting)
       }
    -  private val schema = if (options.columnPruning) requiredSchema else 
dataSchema
     
    -  private val row = new GenericInternalRow(schema.length)
    +  private val parsedSchema = if (options.columnPruning) requiredSchema 
else dataSchema
    --- End diff --
    
    Add a comment like
    
    > // When column pruning is enabled, the parser only parses the required 
columns based on their positions in the data schema. 


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to