Repository: spark Updated Branches: refs/heads/master e6d1406ab -> 73d57754d
[SPARK-6326][SQL] Improve castStruct to be faster Current `castStruct` should be very slow. This pr slightly improves it. Author: Liang-Chi Hsieh <vii...@gmail.com> Closes #5017 from viirya/faster_caststruct and squashes the following commits: 385d5b0 [Liang-Chi Hsieh] Further improved. 746fcfb [Liang-Chi Hsieh] Make castStruct faster. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/73d57754 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/73d57754 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/73d57754 Branch: refs/heads/master Commit: 73d57754dd23d84331c10355338a4240b3ac5fee Parents: e6d1406 Author: Liang-Chi Hsieh <vii...@gmail.com> Authored: Wed Mar 25 17:52:23 2015 -0700 Committer: Michael Armbrust <mich...@databricks.com> Committed: Wed Mar 25 17:52:23 2015 -0700 ---------------------------------------------------------------------- .../apache/spark/sql/catalyst/expressions/Cast.scala | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/73d57754/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 9bde74a..31f1a5f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -394,10 +394,17 @@ case class Cast(child: Expression, dataType: DataType) extends UnaryExpression w val casts = from.fields.zip(to.fields).map { case (fromField, toField) => cast(fromField.dataType, toField.dataType) } - // TODO: This is very slow! - buildCast[Row](_, row => Row(row.toSeq.zip(casts).map { - case (v, cast) => if (v == null) null else cast(v) - }: _*)) + // TODO: Could be faster? + val newRow = new GenericMutableRow(from.fields.size) + buildCast[Row](_, row => { + var i = 0 + while (i < row.length) { + val v = row(i) + newRow.update(i, if (v == null) null else casts(i)(v)) + i += 1 + } + newRow.copy() + }) } private[this] def cast(from: DataType, to: DataType): Any => Any = to match { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org