Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19943#discussion_r159222343
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
 ---
    @@ -110,4 +107,22 @@ object OrcUtils extends Logging {
           }
         }
       }
    +
    +  /**
    +   * Return a fixed ORC schema with data schema information, if needed.
    +   * The schema inside old ORC files might consist of invalid column names 
like '_col0'.
    +   */
    +  def getFixedTypeDescription(
    +      schema: TypeDescription,
    +      dataSchema: StructType): TypeDescription = {
    +    if (schema.getFieldNames.asScala.forall(_.startsWith("_col"))) {
    +      var schemaString = schema.toString
    +      dataSchema.zipWithIndex.foreach { case (field: StructField, index: 
Int) =>
    --- End diff --
    
    shall we make sure `schema` and `dataSchema` have same number of fields?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to