Repository: spark Updated Branches: refs/heads/master 386127377 -> ae226283e
[SQL][MINOR] use stricter type parameter to make it clear that parquet reader returns UnsafeRow ## What changes were proposed in this pull request? a small code style change, it's better to make the type parameter more accurate. ## How was this patch tested? N/A Author: Wenchen Fan <wenc...@databricks.com> Closes #14458 from cloud-fan/parquet. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ae226283 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ae226283 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ae226283 Branch: refs/heads/master Commit: ae226283e19ce396216c73b0ae2470efa122b65b Parents: 3861273 Author: Wenchen Fan <wenc...@databricks.com> Authored: Wed Aug 3 08:23:26 2016 +0800 Committer: Cheng Lian <l...@databricks.com> Committed: Wed Aug 3 08:23:26 2016 +0800 ---------------------------------------------------------------------- .../execution/datasources/parquet/ParquetFileFormat.scala | 4 ++-- .../datasources/parquet/ParquetReadSupport.scala | 10 +++++----- .../datasources/parquet/ParquetRecordMaterializer.scala | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ae226283/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index 772e031..c3e75f1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -370,11 +370,11 @@ private[sql] class ParquetFileFormat logDebug(s"Falling back to parquet-mr") val reader = pushed match { case Some(filter) => - new ParquetRecordReader[InternalRow]( + new ParquetRecordReader[UnsafeRow]( new ParquetReadSupport, FilterCompat.get(filter, null)) case _ => - new ParquetRecordReader[InternalRow](new ParquetReadSupport) + new ParquetRecordReader[UnsafeRow](new ParquetReadSupport) } reader.initialize(split, hadoopAttemptContext) reader http://git-wip-us.apache.org/repos/asf/spark/blob/ae226283/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala index 8a2e0d7..f1a35dd 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala @@ -29,12 +29,12 @@ import org.apache.parquet.schema._ import org.apache.parquet.schema.Type.Repetition import org.apache.spark.internal.Logging -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.types._ /** * A Parquet [[ReadSupport]] implementation for reading Parquet records as Catalyst - * [[InternalRow]]s. + * [[UnsafeRow]]s. * * The API interface of [[ReadSupport]] is a little bit over complicated because of historical * reasons. In older versions of parquet-mr (say 1.6.0rc3 and prior), [[ReadSupport]] need to be @@ -48,7 +48,7 @@ import org.apache.spark.sql.types._ * Due to this reason, we no longer rely on [[ReadContext]] to pass requested schema from [[init()]] * to [[prepareForRead()]], but use a private `var` for simplicity. */ -private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with Logging { +private[parquet] class ParquetReadSupport extends ReadSupport[UnsafeRow] with Logging { private var catalystRequestedSchema: StructType = _ /** @@ -72,13 +72,13 @@ private[parquet] class ParquetReadSupport extends ReadSupport[InternalRow] with /** * Called on executor side after [[init()]], before instantiating actual Parquet record readers. * Responsible for instantiating [[RecordMaterializer]], which is used for converting Parquet - * records to Catalyst [[InternalRow]]s. + * records to Catalyst [[UnsafeRow]]s. */ override def prepareForRead( conf: Configuration, keyValueMetaData: JMap[String, String], fileSchema: MessageType, - readContext: ReadContext): RecordMaterializer[InternalRow] = { + readContext: ReadContext): RecordMaterializer[UnsafeRow] = { log.debug(s"Preparing for read Parquet file with message type: $fileSchema") val parquetRequestedSchema = readContext.getRequestedSchema http://git-wip-us.apache.org/repos/asf/spark/blob/ae226283/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala index d12e780..4e49a0d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRecordMaterializer.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet import org.apache.parquet.io.api.{GroupConverter, RecordMaterializer} import org.apache.parquet.schema.MessageType -import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.types.StructType /** @@ -32,12 +32,12 @@ import org.apache.spark.sql.types.StructType */ private[parquet] class ParquetRecordMaterializer( parquetSchema: MessageType, catalystSchema: StructType, schemaConverter: ParquetSchemaConverter) - extends RecordMaterializer[InternalRow] { + extends RecordMaterializer[UnsafeRow] { private val rootConverter = new ParquetRowConverter(schemaConverter, parquetSchema, catalystSchema, NoopUpdater) - override def getCurrentRecord: InternalRow = rootConverter.currentRecord + override def getCurrentRecord: UnsafeRow = rootConverter.currentRecord override def getRootConverter: GroupConverter = rootConverter } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org