Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/8553#discussion_r40277489 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystReadSupport.scala --- @@ -69,36 +97,6 @@ private[parquet] class CatalystReadSupport extends ReadSupport[InternalRow] with new CatalystRecordMaterializer(parquetRequestedSchema, catalystRequestedSchema) } - - // Called before `prepareForRead()` when initializing Parquet record reader. - override def init(context: InitContext): ReadContext = { - val conf = { - // scalastyle:off jobcontext - context.getConfiguration - // scalastyle:on jobcontext - } - - // If the target file was written by Spark SQL, we should be able to find a serialized Catalyst - // schema of this file from its metadata. - val maybeRowSchema = Option(conf.get(RowWriteSupport.SPARK_ROW_SCHEMA)) - - // Optional schema of requested columns, in the form of a string serialized from a Catalyst - // `StructType` containing all requested columns. - val maybeRequestedSchema = Option(conf.get(CatalystReadSupport.SPARK_ROW_REQUESTED_SCHEMA)) - - val parquetRequestedSchema = - maybeRequestedSchema.fold(context.getFileSchema) { schemaString => - val catalystRequestedSchema = StructType.fromString(schemaString) - CatalystReadSupport.clipParquetSchema(context.getFileSchema, catalystRequestedSchema) - } - - val metadata = - Map.empty[String, String] ++ - maybeRequestedSchema.map(CatalystReadSupport.SPARK_ROW_REQUESTED_SCHEMA -> _) ++ - maybeRowSchema.map(RowWriteSupport.SPARK_ROW_SCHEMA -> _) --- End diff -- Why did we pass in the `maybeRowSchema` before? Seems it was not used by `prepareForRead`.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org