Repository: spark Updated Branches: refs/heads/master 681387b2d -> 091f81e1f
[SPARK-15762][SQL] Cache Metadata & StructType hashCodes; use singleton Metadata.empty We should cache `Metadata.hashCode` and use a singleton for `Metadata.empty` because calculating metadata hashCodes appears to be a bottleneck for certain workloads. We should also cache `StructType.hashCode`. In an optimizer stress-test benchmark run by ericl, these `hashCode` calls accounted for roughly 40% of the total CPU time and this bottleneck was completely eliminated by the caching added by this patch. Author: Josh Rosen <joshro...@databricks.com> Closes #13504 from JoshRosen/metadata-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/091f81e1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/091f81e1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/091f81e1 Branch: refs/heads/master Commit: 091f81e1f7ef1581376c71e3872ce06f4c1713bd Parents: 681387b Author: Josh Rosen <joshro...@databricks.com> Authored: Sat Jun 4 14:14:50 2016 -0700 Committer: Josh Rosen <joshro...@databricks.com> Committed: Sat Jun 4 14:14:50 2016 -0700 ---------------------------------------------------------------------- .../src/main/scala/org/apache/spark/sql/types/Metadata.scala | 7 +++++-- .../main/scala/org/apache/spark/sql/types/StructType.scala | 3 ++- 2 files changed, 7 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/091f81e1/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala index 1fb2e24..657bd86 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Metadata.scala @@ -104,7 +104,8 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any]) } } - override def hashCode: Int = Metadata.hash(this) + private lazy val _hashCode: Int = Metadata.hash(this) + override def hashCode: Int = _hashCode private def get[T](key: String): T = { map(key).asInstanceOf[T] @@ -115,8 +116,10 @@ sealed class Metadata private[types] (private[types] val map: Map[String, Any]) object Metadata { + private[this] val _empty = new Metadata(Map.empty) + /** Returns an empty Metadata. */ - def empty: Metadata = new Metadata(Map.empty) + def empty: Metadata = _empty /** Creates a Metadata instance from JSON. */ def fromJson(json: String): Metadata = { http://git-wip-us.apache.org/repos/asf/spark/blob/091f81e1/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala index fd2b524..9a92373 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala @@ -112,7 +112,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru } } - override def hashCode(): Int = java.util.Arrays.hashCode(fields.asInstanceOf[Array[AnyRef]]) + private lazy val _hashCode: Int = java.util.Arrays.hashCode(fields.asInstanceOf[Array[AnyRef]]) + override def hashCode(): Int = _hashCode /** * Creates a new [[StructType]] by adding a new field. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org