This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 68fa601 [SPARK-27040][SQL] Avoid using unnecessary JoinRow in FileFormat 68fa601 is described below commit 68fa601d62c1e5e7b37a1b7d6b0236019239a00a Author: Gengliang Wang <gengliang.w...@databricks.com> AuthorDate: Mon Mar 4 22:26:11 2019 +0800 [SPARK-27040][SQL] Avoid using unnecessary JoinRow in FileFormat ## What changes were proposed in this pull request? When reading files with empty partition columns, we can avoid using JoinRow. ## How was this patch tested? Existing unit tests. Closes #23953 from gengliangwang/avoidJoinRow. Authored-by: Gengliang Wang <gengliang.w...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../apache/spark/sql/execution/datasources/FileFormat.scala | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala index f0b4971..a72a9c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala @@ -132,8 +132,6 @@ trait FileFormat { new (PartitionedFile => Iterator[InternalRow]) with Serializable { private val fullSchema = requiredSchema.toAttributes ++ partitionSchema.toAttributes - private val joinedRow = new JoinedRow() - // Using lazy val to avoid serialization private lazy val appendPartitionColumns = GenerateUnsafeProjection.generate(fullSchema, fullSchema) @@ -145,8 +143,15 @@ trait FileFormat { // Note that we have to apply the converter even though `file.partitionValues` is empty. // This is because the converter is also responsible for converting safe `InternalRow`s into // `UnsafeRow`s. - dataReader(file).map { dataRow => - converter(joinedRow(dataRow, file.partitionValues)) + if (partitionSchema.isEmpty) { + dataReader(file).map { dataRow => + converter(dataRow) + } + } else { + val joinedRow = new JoinedRow() + dataReader(file).map { dataRow => + converter(joinedRow(dataRow, file.partitionValues)) + } } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org